97 lines
6.2 KiB
Python
97 lines
6.2 KiB
Python
# from datasets import load_dataset
|
|
#
|
|
# dataset_paths = [
|
|
# ['ceval/ceval-exam',
|
|
# ['computer_network', 'operating_system', 'computer_architecture', 'college_programming', 'college_physics',
|
|
# 'college_chemistry', 'advanced_mathematics', 'probability_and_statistics', 'discrete_mathematics',
|
|
# 'electrical_engineer', 'metrology_engineer', 'high_school_mathematics', 'high_school_physics',
|
|
# 'high_school_chemistry', 'high_school_biology', 'middle_school_mathematics', 'middle_school_biology',
|
|
# 'middle_school_physics', 'middle_school_chemistry', 'veterinary_medicine', 'college_economics',
|
|
# 'business_administration', 'marxism', 'mao_zedong_thought', 'education_science', 'teacher_qualification',
|
|
# 'high_school_politics', 'high_school_geography', 'middle_school_politics', 'middle_school_geography',
|
|
# 'modern_chinese_history', 'ideological_and_moral_cultivation', 'logic', 'law', 'chinese_language_and_literature',
|
|
# 'art_studies', 'professional_tour_guide', 'legal_professional', 'high_school_chinese', 'high_school_history',
|
|
# 'middle_school_history', 'civil_servant', 'sports_science', 'plant_protection', 'basic_medicine',
|
|
# 'clinical_medicine', 'urban_and_rural_planner', 'accountant', 'fire_engineer',
|
|
# 'environmental_impact_assessment_engineer', 'tax_accountant', 'physician']], # ceval*
|
|
# ['haonan-li/cmmlu', [
|
|
# 'agronomy', 'anatomy', 'ancient_chinese', 'arts', 'astronomy', 'business_ethics',
|
|
# 'chinese_civil_service_exam', 'chinese_driving_rule', 'chinese_food_culture',
|
|
# 'chinese_foreign_policy', 'chinese_history', 'chinese_literature',
|
|
# 'chinese_teacher_qualification', 'clinical_knowledge', 'college_actuarial_science',
|
|
# 'college_education', 'college_engineering_hydrology', 'college_law',
|
|
# 'college_mathematics', 'college_medical_statistics', 'college_medicine',
|
|
# 'computer_science', 'computer_security', 'conceptual_physics',
|
|
# 'construction_project_management', 'economics', 'education', 'electrical_engineering',
|
|
# 'elementary_chinese', 'elementary_commonsense', 'elementary_information_and_technology',
|
|
# 'elementary_mathematics', 'ethnology', 'food_science', 'genetics', 'global_facts',
|
|
# 'high_school_biology', 'high_school_chemistry', 'high_school_geography',
|
|
# 'high_school_mathematics', 'high_school_physics', 'high_school_politics',
|
|
# 'human_sexuality', 'international_law', 'journalism', 'jurisprudence',
|
|
# 'legal_and_moral_basis', 'logical', 'machine_learning', 'management', 'marketing',
|
|
# 'marxist_theory', 'modern_chinese', 'nutrition', 'philosophy', 'professional_accounting',
|
|
# 'professional_law', 'professional_medicine', 'professional_psychology',
|
|
# 'public_relations', 'security_study', 'sociology', 'sports_science',
|
|
# 'traditional_chinese_medicine', 'virology', 'world_history', 'world_religions'
|
|
# ]], # cmmlu*
|
|
# ['tyouisen/aclue',
|
|
# ['polysemy_resolution', 'poetry_sentiment_analysis', 'named_entity_recognition', 'basic_ancient_chinese',
|
|
# 'poetry_context_prediction', 'sentence_segmentation', 'couplet_prediction', 'poetry_appreciate',
|
|
# 'ancient_chinese_culture', 'ancient_phonetics', 'homographic_character_resolution', 'ancient_literature',
|
|
# 'ancient_medical', 'poetry_quality_assessment', 'reading_comprehension']], # aclue
|
|
# ['juletxara/mgsm', ['zh']], # mgsm_direct_zh
|
|
# ['openbookqa', ['main']], # openbookqa
|
|
# ['ZoneTwelve/tmmluplus',
|
|
# ['dentistry', 'traditional_chinese_medicine_clinical_medicine', 'clinical_psychology', 'technical',
|
|
# 'culinary_skills', 'mechanical', 'logic_reasoning', 'real_estate', 'general_principles_of_law', 'finance_banking',
|
|
# 'anti_money_laundering', 'ttqav2', 'marketing_management', 'business_management', 'organic_chemistry',
|
|
# 'advance_chemistry', 'physics', 'secondary_physics', 'human_behavior', 'national_protection', 'jce_humanities',
|
|
# 'politic_science', 'agriculture', 'official_document_management', 'financial_analysis', 'pharmacy',
|
|
# 'educational_psychology', 'statistics_and_machine_learning', 'management_accounting', 'introduction_to_law',
|
|
# 'computer_science', 'veterinary_pathology', 'accounting', 'fire_science', 'optometry', 'insurance_studies',
|
|
# 'pharmacology', 'taxation', 'education_(profession_level)', 'economics', 'veterinary_pharmacology',
|
|
# 'nautical_science', 'occupational_therapy_for_psychological_disorders', 'trust_practice', 'geography_of_taiwan',
|
|
# 'physical_education', 'auditing', 'administrative_law', 'basic_medical_science', 'macroeconomics', 'trade',
|
|
# 'chinese_language_and_literature', 'tve_design', 'junior_science_exam', 'junior_math_exam', 'junior_chinese_exam',
|
|
# 'junior_social_studies', 'tve_mathematics', 'tve_chinese_language', 'tve_natural_sciences', 'junior_chemistry',
|
|
# 'music', 'education', 'three_principles_of_people', 'taiwanese_hokkien', 'engineering_math', 'linear_algebra']]
|
|
# # tmmluplus
|
|
#
|
|
# ]
|
|
#
|
|
# for dataset_path in dataset_paths:
|
|
# for dataset_name in dataset_path[1]:
|
|
# datasets = load_dataset(dataset_path[0], dataset_name, cache_dir='./test_dataset_cache')
|
|
#
|
|
# """
|
|
# export HF_HUB_OFFLINE=1 && lm_eval --model hf --model_args pretrained=/xxx/minimind/minimind-v2-small/,device=cuda,dtype=auto --tasks ceval* --batch_size 8 --trust_remote_code
|
|
# """
|
|
"""
|
|
$env:HF_HUB_OFFLINE=1; lm_eval --model hf --model_args pretrained=../minimind-v2-small/,device=cuda,dtype=auto --tasks ceval* --batch_size 8 --trust_remote_code
|
|
"""
|
|
|
|
import subprocess
|
|
|
|
# 定义要执行的命令
|
|
command = (
|
|
'set HF_HUB_OFFLINE=1 & '
|
|
'lm_eval --model hf --model_args pretrained=../minimind-v2-small/,device=cuda,dtype=auto '
|
|
'--tasks ceval* --batch_size 8 --trust_remote_code'
|
|
)
|
|
|
|
# 使用 subprocess 执行命令
|
|
try:
|
|
process = subprocess.run(
|
|
command,
|
|
shell=True,
|
|
check=True,
|
|
text=True,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
)
|
|
# 打印命令的输出
|
|
print("STDOUT:", process.stdout)
|
|
print("STDERR:", process.stderr)
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"命令执行失败,返回码: {e.returncode}")
|
|
print("STDERR:", e.stderr) |