import json import pandas as pd from dotenv import load_dotenv from model import LLMChain_test_many_output, LLMChain_test_outputs def excel_2_csv(excel_file,csv_file): df = pd.read_excel(excel_file) df.to_csv(csv_file, index=False) def summary_outputs(context, result_count): return LLMChain_test_outputs(context,result_count) def multiple_comment_outputs(csv_file,batch_count=None): df = pd.read_csv(csv_file) column = df['评论内容'] if not batch_count: batch_count = len(column) data = [] result = [] result_count = [] for i in range(0, len(column[0:batch_count]), 10): batch = column[i:i + 10] count = len(batch) batch_dict = {"{}".format(index+i+1):value for index,value in enumerate(batch)} text = ("\n").join(["{}.{}".format(index+i+1,value) for index,value in enumerate(batch)]) res = LLMChain_test_many_output(text,count) json_string = res.split("```json")[1].strip().strip("```").strip() json_data = json.loads(json_string) result.append(json_data.get("summary")) result_count.append(json_data.get("count")) json_data.pop('count') json_data.pop('summary') for key, value in json_data.items(): value['comment'] = batch_dict.get(key,'') data.append(json_data) total_positive = sum(item["正面"] for item in result_count) total_negative = sum(item["负面"] for item in result_count) total_indifference = sum(item["无关"] for item in result_count) total = total_positive+total_negative+total_indifference positive = round(total_positive/total, 2)*100 negative = round(total_negative/total, 2)*100 neutral = round(total_indifference/total, 2)*100 summary = summary_outputs(";".join(result), result_count) return { "data": data, "positive": positive, "negative": negative, "neutral": neutral, "summary": summary, } if __name__ == '__main__': excel_file = 'test.xlsx' csv_file = 'example.csv' # excel_2_csv(excel_file, csv_file) load_dotenv() multiple_comment_outputs(csv_file)