Spaces:

StarbucksCN
/

starbucks_summary

Runtime error

File size: 2,166 Bytes

import json

import pandas as pd
from dotenv import load_dotenv
from model import LLMChain_test_many_output, LLMChain_test_outputs


def excel_2_csv(excel_file,csv_file):
    df = pd.read_excel(excel_file)
    df.to_csv(csv_file, index=False)

def summary_outputs(context, result_count):
    return LLMChain_test_outputs(context,result_count)

def multiple_comment_outputs(csv_file,batch_count=None):
    df = pd.read_csv(csv_file)
    column = df['评论内容']
    if not batch_count:
        batch_count = len(column)
    data = []
    result = []
    result_count = []
    for i in range(0, len(column[0:batch_count]), 10):
        batch = column[i:i + 10]
        count = len(batch)
        batch_dict = {"{}".format(index+i+1):value for index,value in enumerate(batch)}
        text = ("\n").join(["{}.{}".format(index+i+1,value) for index,value in enumerate(batch)])
        res = LLMChain_test_many_output(text,count)
        json_string = res.split("```json")[1].strip().strip("```").strip()
        json_data = json.loads(json_string)
        result.append(json_data.get("summary"))
        result_count.append(json_data.get("count"))
        json_data.pop('count')
        json_data.pop('summary')

        for key, value in json_data.items():
            value['comment'] = batch_dict.get(key,'')
        data.append(json_data)
    total_positive = sum(item["正面"] for item in result_count)
    total_negative = sum(item["负面"] for item in result_count)
    total_indifference = sum(item["无关"] for item in result_count)
    total = total_positive+total_negative+total_indifference
    positive = round(total_positive/total, 2)*100
    negative = round(total_negative/total, 2)*100
    neutral = round(total_indifference/total, 2)*100
    summary = summary_outputs(";".join(result), result_count)
    return {
        "data": data,
        "positive": positive,
        "negative": negative,
        "neutral": neutral,
        "summary": summary,
    }





if __name__ == '__main__':
    excel_file = 'test.xlsx'
    csv_file = 'example.csv'
    # excel_2_csv(excel_file, csv_file)

    load_dotenv()
    multiple_comment_outputs(csv_file)