starbucks_summary / multipleCommentOutputs.py
binliu
commit streamlit
2a0a140
import json
import pandas as pd
from dotenv import load_dotenv
from model import LLMChain_test_many_output, LLMChain_test_outputs
def excel_2_csv(excel_file,csv_file):
df = pd.read_excel(excel_file)
df.to_csv(csv_file, index=False)
def summary_outputs(context, result_count):
return LLMChain_test_outputs(context,result_count)
def multiple_comment_outputs(csv_file,batch_count=None):
df = pd.read_csv(csv_file)
column = df['评论内容']
if not batch_count:
batch_count = len(column)
data = []
result = []
result_count = []
for i in range(0, len(column[0:batch_count]), 10):
batch = column[i:i + 10]
count = len(batch)
batch_dict = {"{}".format(index+i+1):value for index,value in enumerate(batch)}
text = ("\n").join(["{}.{}".format(index+i+1,value) for index,value in enumerate(batch)])
res = LLMChain_test_many_output(text,count)
json_string = res.split("```json")[1].strip().strip("```").strip()
json_data = json.loads(json_string)
result.append(json_data.get("summary"))
result_count.append(json_data.get("count"))
json_data.pop('count')
json_data.pop('summary')
for key, value in json_data.items():
value['comment'] = batch_dict.get(key,'')
data.append(json_data)
total_positive = sum(item["正面"] for item in result_count)
total_negative = sum(item["负面"] for item in result_count)
total_indifference = sum(item["无关"] for item in result_count)
total = total_positive+total_negative+total_indifference
positive = round(total_positive/total, 2)*100
negative = round(total_negative/total, 2)*100
neutral = round(total_indifference/total, 2)*100
summary = summary_outputs(";".join(result), result_count)
return {
"data": data,
"positive": positive,
"negative": negative,
"neutral": neutral,
"summary": summary,
}
if __name__ == '__main__':
excel_file = 'test.xlsx'
csv_file = 'example.csv'
# excel_2_csv(excel_file, csv_file)
load_dotenv()
multiple_comment_outputs(csv_file)