# from datasets import load_dataset # # dataset_paths = [ # ['ceval/ceval-exam', # ['computer_network', 'operating_system', 'computer_architecture', 'college_programming', 'college_physics', # 'college_chemistry', 'advanced_mathematics', 'probability_and_statistics', 'discrete_mathematics', # 'electrical_engineer', 'metrology_engineer', 'high_school_mathematics', 'high_school_physics', # 'high_school_chemistry', 'high_school_biology', 'middle_school_mathematics', 'middle_school_biology', # 'middle_school_physics', 'middle_school_chemistry', 'veterinary_medicine', 'college_economics', # 'business_administration', 'marxism', 'mao_zedong_thought', 'education_science', 'teacher_qualification', # 'high_school_politics', 'high_school_geography', 'middle_school_politics', 'middle_school_geography', # 'modern_chinese_history', 'ideological_and_moral_cultivation', 'logic', 'law', 'chinese_language_and_literature', # 'art_studies', 'professional_tour_guide', 'legal_professional', 'high_school_chinese', 'high_school_history', # 'middle_school_history', 'civil_servant', 'sports_science', 'plant_protection', 'basic_medicine', # 'clinical_medicine', 'urban_and_rural_planner', 'accountant', 'fire_engineer', # 'environmental_impact_assessment_engineer', 'tax_accountant', 'physician']], # ceval* # ['haonan-li/cmmlu', [ # 'agronomy', 'anatomy', 'ancient_chinese', 'arts', 'astronomy', 'business_ethics', # 'chinese_civil_service_exam', 'chinese_driving_rule', 'chinese_food_culture', # 'chinese_foreign_policy', 'chinese_history', 'chinese_literature', # 'chinese_teacher_qualification', 'clinical_knowledge', 'college_actuarial_science', # 'college_education', 'college_engineering_hydrology', 'college_law', # 'college_mathematics', 'college_medical_statistics', 'college_medicine', # 'computer_science', 'computer_security', 'conceptual_physics', # 'construction_project_management', 'economics', 'education', 'electrical_engineering', # 'elementary_chinese', 'elementary_commonsense', 'elementary_information_and_technology', # 'elementary_mathematics', 'ethnology', 'food_science', 'genetics', 'global_facts', # 'high_school_biology', 'high_school_chemistry', 'high_school_geography', # 'high_school_mathematics', 'high_school_physics', 'high_school_politics', # 'human_sexuality', 'international_law', 'journalism', 'jurisprudence', # 'legal_and_moral_basis', 'logical', 'machine_learning', 'management', 'marketing', # 'marxist_theory', 'modern_chinese', 'nutrition', 'philosophy', 'professional_accounting', # 'professional_law', 'professional_medicine', 'professional_psychology', # 'public_relations', 'security_study', 'sociology', 'sports_science', # 'traditional_chinese_medicine', 'virology', 'world_history', 'world_religions' # ]], # cmmlu* # ['tyouisen/aclue', # ['polysemy_resolution', 'poetry_sentiment_analysis', 'named_entity_recognition', 'basic_ancient_chinese', # 'poetry_context_prediction', 'sentence_segmentation', 'couplet_prediction', 'poetry_appreciate', # 'ancient_chinese_culture', 'ancient_phonetics', 'homographic_character_resolution', 'ancient_literature', # 'ancient_medical', 'poetry_quality_assessment', 'reading_comprehension']], # aclue # ['juletxara/mgsm', ['zh']], # mgsm_direct_zh # ['openbookqa', ['main']], # openbookqa # ['ZoneTwelve/tmmluplus', # ['dentistry', 'traditional_chinese_medicine_clinical_medicine', 'clinical_psychology', 'technical', # 'culinary_skills', 'mechanical', 'logic_reasoning', 'real_estate', 'general_principles_of_law', 'finance_banking', # 'anti_money_laundering', 'ttqav2', 'marketing_management', 'business_management', 'organic_chemistry', # 'advance_chemistry', 'physics', 'secondary_physics', 'human_behavior', 'national_protection', 'jce_humanities', # 'politic_science', 'agriculture', 'official_document_management', 'financial_analysis', 'pharmacy', # 'educational_psychology', 'statistics_and_machine_learning', 'management_accounting', 'introduction_to_law', # 'computer_science', 'veterinary_pathology', 'accounting', 'fire_science', 'optometry', 'insurance_studies', # 'pharmacology', 'taxation', 'education_(profession_level)', 'economics', 'veterinary_pharmacology', # 'nautical_science', 'occupational_therapy_for_psychological_disorders', 'trust_practice', 'geography_of_taiwan', # 'physical_education', 'auditing', 'administrative_law', 'basic_medical_science', 'macroeconomics', 'trade', # 'chinese_language_and_literature', 'tve_design', 'junior_science_exam', 'junior_math_exam', 'junior_chinese_exam', # 'junior_social_studies', 'tve_mathematics', 'tve_chinese_language', 'tve_natural_sciences', 'junior_chemistry', # 'music', 'education', 'three_principles_of_people', 'taiwanese_hokkien', 'engineering_math', 'linear_algebra']] # # tmmluplus # # ] # # for dataset_path in dataset_paths: # for dataset_name in dataset_path[1]: # datasets = load_dataset(dataset_path[0], dataset_name, cache_dir='./test_dataset_cache') # # """ # export HF_HUB_OFFLINE=1 && lm_eval --model hf --model_args pretrained=/xxx/minimind/minimind-v2-small/,device=cuda,dtype=auto --tasks ceval* --batch_size 8 --trust_remote_code # """ """ $env:HF_HUB_OFFLINE=1; lm_eval --model hf --model_args pretrained=../minimind-v2-small/,device=cuda,dtype=auto --tasks ceval* --batch_size 8 --trust_remote_code """ import subprocess # 定义要执行的命令 command = ( 'set HF_HUB_OFFLINE=1 & ' 'lm_eval --model hf --model_args pretrained=../minimind-v2-small/,device=cuda,dtype=auto ' '--tasks ceval* --batch_size 8 --trust_remote_code' ) # 使用 subprocess 执行命令 try: process = subprocess.run( command, shell=True, check=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) # 打印命令的输出 print("STDOUT:", process.stdout) print("STDERR:", process.stderr) except subprocess.CalledProcessError as e: print(f"命令执行失败,返回码: {e.returncode}") print("STDERR:", e.stderr)