Spaces:

StarbucksCN
/

starbucks_summary

Runtime error

App Files Files Community

binliu commited on Jul 12, 2023

Commit

eb72193

1 Parent(s): fc74cd7

multiple Comment Output

Browse files

Files changed (8) hide show

example.csv +0 -0
main.py +24 -0
model.py +80 -0
multipleCommentOutputs.py +59 -0
requirements.txt +12 -0
template.py +18 -0
test.txt +31 -0
test.xlsx +0 -0

example.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

main.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from dotenv import load_dotenv
+from multipleCommentOutputs import multiple_comment_outputs
+if __name__ == '__main__':
+    load_dotenv()
+    # 单个评论
+    # context = '中杯电子券也用不了'
+    # print(LLMChain_test_single(context))
+    # 多评论格式化输出
+    csv_file = 'example.csv'
+    print(multiple_comment_outputs(csv_file,20))
+    # csv_file = 'example.csv'
+    # refine_multiple_comment_outputs(csv_file)

model.py ADDED Viewed

	@@ -0,0 +1,80 @@

+from langchain.chains.combine_documents.refine import RefineDocumentsChain
+from langchain.chains.qa_with_sources import load_qa_with_sources_chain
+from template import template, template_many, template_many_out_put
+from langchain import LLMChain
+from langchain.llms import OpenAI, AzureOpenAI
+from langchain.output_parsers import StructuredOutputParser, ResponseSchema
+from langchain.prompts import PromptTemplate
+def LLMChain_test_outputs(context, result_count):
+    llm = OpenAI(model_name="text-davinci-003", engine="text-davinci-003",temperature=0)
+    prompt = PromptTemplate(template=template, input_variables=['context'])
+    chain = LLMChain(llm=llm, prompt=prompt)
+    return chain.run(
+        {
+            'context': context,
+        }
+    )
+def LLMChain_test(context):
+    llm = OpenAI(model_name="text-davinci-003", engine="text-davinci-003",temperature=0)
+    prompt = PromptTemplate(template=template, input_variables=['context','positive', 'negative','indifference'])
+    chain = LLMChain(llm=llm, prompt=prompt)
+    return chain.run(
+        {
+            'context': context,
+            'positive': '正面',
+            'negative': '负面',
+            'indifference': '无关'
+        }
+    )
+def LLMChain_test_many(context,count):
+    llm = OpenAI(model_name="text-davinci-003", engine="text-davinci-003", temperature=0,max_tokens=3000,verbose=True)
+    prompt = PromptTemplate(template=template_many,
+                            input_variables=['count','context', 'positive', 'negative', 'indifference'],
+                            )
+    chain = LLMChain(llm=llm, prompt=prompt)
+    return chain.run(
+        {
+            'count': count,
+            'context': context,
+            'positive': '正面',
+            'negative': '负面',
+            'indifference': '无关'
+        })
+def LLMChain_test_many_output(context,count):
+    llm = OpenAI(model_name="text-davinci-003", engine="text-davinci-003", temperature=0,max_tokens=3000,verbose=True)
+    response_schemas = [
+        ResponseSchema(name="index", description="句子的序号作为数组的下标"),
+        ResponseSchema(name="emotion", description="每个句子的情绪，枚举值：正面、负面、无关"),
+        ResponseSchema(name="description", description="对每个句子的总结"),
+        ResponseSchema(name="count", description="单独key整体的正面、负面、无关的条数，并且只对整体"),
+        ResponseSchema(name="summary", description="给出的所有句子整体的总结")
+    ]
+    # 初始化解析器
+    output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
+    format_instructions = output_parser.get_format_instructions()
+    prompt = PromptTemplate(template=template_many_out_put,
+                            input_variables=['count','context', 'positive', 'negative', 'indifference'],
+                            partial_variables={"format_instructions": format_instructions},)
+    promptValue = prompt.format(**{
+            'count': count,
+            'context': context,
+            'positive': '正面',
+            'negative': '负面',
+            'indifference': '无关'
+        })
+    llm_output = llm(promptValue)
+    # return output_parser.parse(llm_output)
+    return llm_output

multipleCommentOutputs.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import json
+import pandas as pd
+from dotenv import load_dotenv
+from model import LLMChain_test_many_output, LLMChain_test_outputs
+def excel_2_csv(excel_file,csv_file):
+    df = pd.read_excel(excel_file)
+    df.to_csv(csv_file, index=False)
+def summary_outputs(context, result_count):
+    return LLMChain_test_outputs(context,result_count)
+def multiple_comment_outputs(csv_file,batch_count):
+    df = pd.read_csv(csv_file)
+    column = df['评论内容']
+    data = []
+    result = []
+    result_count = []
+    for i in range(0, len(column[0:batch_count]), 10):
+        batch = column[i:i + 10]
+        count = len(batch)
+        text = ("\n").join(["{}.{}".format(index+i+1,value) for index,value in enumerate(batch)])
+        res = LLMChain_test_many_output(text,count)
+        json_string = res.split("```json")[1].strip().strip("```").strip()
+        json_data = json.loads(json_string)
+        result.append(json_data.get("summary"))
+        result_count.append(json_data.get("count"))
+        data.append(json_data)
+    total_positive = sum(item["正面"] for item in result_count)
+    total_negative = sum(item["负面"] for item in result_count)
+    total_indifference = sum(item["无关"] for item in result_count)
+    total = total_positive+total_negative+total_indifference
+    positive = round(total_positive/total, 2)*100
+    negative = round(total_negative/total, 2)*100
+    neutral = round(total_indifference/total, 2)*100
+    summary = summary_outputs(";".join(result), result_count)
+    return {
+        "data": data,
+        "positive": positive,
+        "negative": negative,
+        "neutral": neutral,
+        "summary": summary,
+    }
+if __name__ == '__main__':
+    excel_file = 'test.xlsx'
+    csv_file = 'example.csv'
+    # excel_2_csv(excel_file, csv_file)
+    load_dotenv()
+    multiple_comment_outputs(csv_file)

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+streamlit
+transformers
+torch
+PyPDF2
+langchain
+openai
+tiktoken
+faiss-cpu
+spacy
+pinecone-client
+pypdf
+python-dotenv

template.py ADDED Viewed

	@@ -0,0 +1,18 @@

+template = '''Provide a stunning and constructive summary based on the following text
+    {context},
+    '''
+template_many = '''给出下面一段上下文中,对以下{count}句子分别给出总结:
+    {context}
+    请用“{positive}”或“{negative}”或“{indifference}”回答,并给出总结
+    最后对这{count}条进行统计“{positive}”和“{negative}”和“{indifference}”的百分占比
+    '''
+template_many_out_put = '''Given the following context, provide a summary for each of the {count} sentences:
+    {context}
+Please answer with "{positive}" or "{negative}" or "{indifference}", and provide a summary.
+Finally, calculate the count of "{positive}", "{negative}", and "{indifference}" among these {count} sentences.
+    {format_instructions}
+    '''

test.txt ADDED Viewed

	@@ -0,0 +1,31 @@

+既占用冰摇杯又占用blender，严重影响出饮速度，有考虑过一线实操的可行性吗？为什么使用相同配方，不以冰杯作为参照物？有考虑过景区门店的可操作性吗？同时，新品相较于其他品牌饮品有很大竞争力吗？
+回复 @Rachel Zhou(周婉盈) ：伙伴你好，早餐门店可以加购的
+你好，我在咖快点了一杯生啡做好了吗[微笑]
+回复 @Zoe Liang(梁晶) ：星冰爽
+还是逃不过星冰乐呗？生咖细腻版？
+这是个什么鬼东西。
+我喝出了AD钙奶的味道
+设计部混吃的啊
+回复@Olivia PENG(彭潇涵) ：就是嘞，吧物料放进搅拌机就Ok.
+看着像粉粉生咖（冰沙版）
+1
+怎么没有加云的版本？奶盖可能更好喝点。
+回复 @Benson Liu(刘彬彬) ：饮品名 打成了 饮品颜名 多了一个颜字
+App上为啥没办法定制加糖浆啊？只能选择少冰
+中杯电子券也用不了
+为啥用不了买一赠一
+回复 @Rachel Zhou(周婉盈) ：啡快可以，刚刚用电脑尝试不行。
+为啥伙伴券用不了！
+请问大家的pinkdrink能加早餐套餐吗
+为啥我喝粉粉生咖 我喝出了 儿童版急支糖浆那个味道
+回复@Vincent Hou(侯竑宇) ：天然的颜色呈现错了？
+粉粉生咖~萨瓦里卡~
+回复 @Bobby ZHANG(张禹涵) ：听君一席话如听一席话
+生咖和低因豆拿铁相比，等量杯型谁更低？
+海报有错别字“生咖系列饮品颜名以天然呈色” 你们检查一下
+如果不难喝的话 还挺好喝的
+现在LTO都降价了，不再是38/41/44了
+回复 @Gofree Li(李迪) ：lobby的很不错
+文案可以不用那么浮夸吗
+回复@Alan Li(李超) ：昨天就有…
+有点像蜜雪冰城

test.xlsx ADDED Viewed

Binary file (158 kB). View file