184 lines
6.9 KiB
Python
184 lines
6.9 KiB
Python
![]() |
import random
|
|||
|
import time
|
|||
|
import os
|
|||
|
|
|||
|
import pandas as pd
|
|||
|
import torch
|
|||
|
import warnings
|
|||
|
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|||
|
from model.model import Transformer
|
|||
|
from model.LMConfig import LMConfig
|
|||
|
import torch.nn.functional as F
|
|||
|
|
|||
|
warnings.filterwarnings('ignore')
|
|||
|
|
|||
|
|
|||
|
def init_model(lm_config):
|
|||
|
tokenizer = AutoTokenizer.from_pretrained('./model/minimind_tokenizer',
|
|||
|
trust_remote_code=True, use_fast=False)
|
|||
|
model_from = 1 # 1从权重,2用transformers
|
|||
|
|
|||
|
if model_from == 1:
|
|||
|
moe_path = '_moe' if lm_config.use_moe else ''
|
|||
|
ckp = f'./out/single_chat/full_sft_{lm_config.dim}{moe_path}.pth'
|
|||
|
|
|||
|
model = Transformer(lm_config)
|
|||
|
state_dict = torch.load(ckp, map_location=device)
|
|||
|
|
|||
|
# 处理不需要的前缀
|
|||
|
unwanted_prefix = '_orig_mod.'
|
|||
|
for k, v in list(state_dict.items()):
|
|||
|
if k.startswith(unwanted_prefix):
|
|||
|
state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
|
|||
|
# 加载到模型中
|
|||
|
model.load_state_dict(state_dict, strict=False)
|
|||
|
else:
|
|||
|
model = AutoModelForCausalLM.from_pretrained('minimind', trust_remote_code=True)
|
|||
|
model = model.to(device)
|
|||
|
|
|||
|
return model, tokenizer
|
|||
|
|
|||
|
|
|||
|
if __name__ == "__main__":
|
|||
|
# -----------------------------------------------------------------------------
|
|||
|
seed = random.randint(1, 2000)
|
|||
|
# device = 'cuda:0'
|
|||
|
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
|
|||
|
dtype = 'bfloat16'
|
|||
|
lm_config = LMConfig()
|
|||
|
# -----------------------------------------------------------------------------
|
|||
|
|
|||
|
model, tokenizer = init_model(lm_config)
|
|||
|
model = model.eval()
|
|||
|
|
|||
|
# 消息模板,具体实现根据你的tokenizer进行调整
|
|||
|
messages_origin = [{"role": "system", "content": "开始回答问题"}]
|
|||
|
|
|||
|
# 定义文件目录
|
|||
|
File_Dir = "ceval/ceval-exam/val"
|
|||
|
results_dir = "ceval/ceval_result"
|
|||
|
|
|||
|
# 确保结果目录存在
|
|||
|
if not os.path.exists(results_dir):
|
|||
|
os.makedirs(results_dir)
|
|||
|
|
|||
|
# 用于记录所有文件的总正确数和总题数
|
|||
|
total_correct = 0
|
|||
|
total_questions = 0
|
|||
|
|
|||
|
# 遍历目录下的所有CSV文件
|
|||
|
for filename in os.listdir(File_Dir):
|
|||
|
if filename.endswith('.csv'):
|
|||
|
file_path = os.path.join(File_Dir, filename)
|
|||
|
test_df = pd.read_csv(file_path)
|
|||
|
|
|||
|
# 存储结果的DataFrame
|
|||
|
results_df = pd.DataFrame(columns=['question', 'A', 'B', 'C', 'D', 'answer', 'llm_answer', 'is_right'])
|
|||
|
total_correct_in_file = 0 # 用于记录当前文件的正确数
|
|||
|
|
|||
|
for row in test_df.itertuples(index=True, name='Pandas'):
|
|||
|
id = getattr(row, 'id')
|
|||
|
question = getattr(row, 'question')
|
|||
|
A = getattr(row, 'A')
|
|||
|
B = getattr(row, 'B')
|
|||
|
C = getattr(row, 'C')
|
|||
|
D = getattr(row, 'D')
|
|||
|
right_answer = getattr(row, 'answer')
|
|||
|
|
|||
|
prompt = f'{question}。选择 A: {A}, B: {B}, C: {C}, D: {D}'
|
|||
|
|
|||
|
messages = messages_origin.copy()
|
|||
|
messages.append({"role": "user", "content": prompt})
|
|||
|
|
|||
|
# print(messages)
|
|||
|
new_prompt = tokenizer.apply_chat_template(
|
|||
|
messages,
|
|||
|
tokenize=False,
|
|||
|
add_generation_prompt=True
|
|||
|
)
|
|||
|
x = tokenizer(new_prompt).data['input_ids']
|
|||
|
x = (torch.tensor(x, dtype=torch.long, device=device)[None, ...])
|
|||
|
res_ids = model.eval_answer(x)
|
|||
|
|
|||
|
# 假设 res_ids 是模型的 logits 输出,我们使用 softmax 转换为概率分布
|
|||
|
probabilities = F.softmax(res_ids, dim=-1)
|
|||
|
|
|||
|
# 定义每个选项的 token id
|
|||
|
A_id = tokenizer('A').data['input_ids']
|
|||
|
B_id = tokenizer('B').data['input_ids']
|
|||
|
C_id = tokenizer('C').data['input_ids']
|
|||
|
D_id = tokenizer('D').data['input_ids']
|
|||
|
|
|||
|
# 获取每个选项的概率
|
|||
|
A_prob = probabilities[0, A_id].item()
|
|||
|
B_prob = probabilities[0, B_id].item()
|
|||
|
C_prob = probabilities[0, C_id].item()
|
|||
|
D_prob = probabilities[0, D_id].item()
|
|||
|
|
|||
|
# 将每个选项的概率放入字典中便于处理
|
|||
|
options_prob = {
|
|||
|
'A': A_prob,
|
|||
|
'B': B_prob,
|
|||
|
'C': C_prob,
|
|||
|
'D': D_prob
|
|||
|
}
|
|||
|
|
|||
|
# 找到具有最大概率的选项
|
|||
|
max_option_answer = max(options_prob, key=options_prob.get)
|
|||
|
|
|||
|
# 比较答案并记录
|
|||
|
is_right = 1 if max_option_answer == right_answer else 0
|
|||
|
results_df = results_df.append({
|
|||
|
'question': question,
|
|||
|
'A': A,
|
|||
|
'B': B,
|
|||
|
'C': C,
|
|||
|
'D': D,
|
|||
|
'answer': right_answer,
|
|||
|
'llm_answer': max_option_answer,
|
|||
|
'is_right': is_right
|
|||
|
}, ignore_index=True)
|
|||
|
# print(f'id: {id} 问题: {question[:10]}... 是否正确: {is_right}')
|
|||
|
|
|||
|
if is_right:
|
|||
|
total_correct_in_file += 1
|
|||
|
|
|||
|
total_correct += total_correct_in_file
|
|||
|
total_questions += len(test_df)
|
|||
|
|
|||
|
# 计算当前文件的正确率并添加到结果DataFrame的最后一行
|
|||
|
accuracy = total_correct_in_file / len(test_df)
|
|||
|
results_df = results_df.append({
|
|||
|
'question': '-',
|
|||
|
'A': '-',
|
|||
|
'B': '-',
|
|||
|
'C': '-',
|
|||
|
'D': '-',
|
|||
|
'answer': f'文件 {filename} 的正确率: {accuracy:.2%}',
|
|||
|
'llm_answer': '-',
|
|||
|
'is_right': '-'
|
|||
|
}, ignore_index=True)
|
|||
|
|
|||
|
print(f'{filename.split(".")[0]} ,{total_correct_in_file}/{len(test_df)},正确率: {accuracy:.2%}')
|
|||
|
|
|||
|
# 保存结果到CSV
|
|||
|
results_path = os.path.join(results_dir, f"{filename.split('.')[0]}_result.csv")
|
|||
|
results_df.to_csv(results_path, index=False)
|
|||
|
|
|||
|
# 计算总正确率
|
|||
|
total_accuracy = total_correct / total_questions if total_questions > 0 else 0
|
|||
|
|
|||
|
# 将各个文件的正确率以及总正确率写入到 "ceval/ceval_result/test.log"
|
|||
|
log_path = os.path.join(results_dir, "test.log")
|
|||
|
with open(log_path, 'w') as log_file:
|
|||
|
result = f"总题数: {total_questions}\n总正确数: {total_correct}\n总正确率: {total_accuracy:.2%}"
|
|||
|
log_file.write(result)
|
|||
|
|
|||
|
print(result)
|
|||
|
|
|||
|
for filename in os.listdir(File_Dir):
|
|||
|
if filename.endswith('.csv'):
|
|||
|
accuracy_file = pd.read_csv(os.path.join(results_dir, f"{filename.split('.')[0]}_result.csv"))
|
|||
|
last_row = accuracy_file.iloc[-1]['answer']
|
|||
|
log_file.write(f"{filename}: {last_row}\n")
|