Spaces:
Runtime error
Runtime error
binliu
commited on
Commit
·
eb72193
1
Parent(s):
fc74cd7
multiple Comment Output
Browse files- example.csv +0 -0
- main.py +24 -0
- model.py +80 -0
- multipleCommentOutputs.py +59 -0
- requirements.txt +12 -0
- template.py +18 -0
- test.txt +31 -0
- test.xlsx +0 -0
example.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
main.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dotenv import load_dotenv
|
| 2 |
+
|
| 3 |
+
from multipleCommentOutputs import multiple_comment_outputs
|
| 4 |
+
|
| 5 |
+
if __name__ == '__main__':
|
| 6 |
+
load_dotenv()
|
| 7 |
+
|
| 8 |
+
# 单个评论
|
| 9 |
+
# context = '中杯电子券也用不了'
|
| 10 |
+
# print(LLMChain_test_single(context))
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# 多评论格式化输出
|
| 14 |
+
csv_file = 'example.csv'
|
| 15 |
+
print(multiple_comment_outputs(csv_file,20))
|
| 16 |
+
|
| 17 |
+
# csv_file = 'example.csv'
|
| 18 |
+
# refine_multiple_comment_outputs(csv_file)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
model.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.chains.combine_documents.refine import RefineDocumentsChain
|
| 2 |
+
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
| 3 |
+
|
| 4 |
+
from template import template, template_many, template_many_out_put
|
| 5 |
+
from langchain import LLMChain
|
| 6 |
+
from langchain.llms import OpenAI, AzureOpenAI
|
| 7 |
+
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
|
| 8 |
+
from langchain.prompts import PromptTemplate
|
| 9 |
+
|
| 10 |
+
def LLMChain_test_outputs(context, result_count):
|
| 11 |
+
|
| 12 |
+
llm = OpenAI(model_name="text-davinci-003", engine="text-davinci-003",temperature=0)
|
| 13 |
+
prompt = PromptTemplate(template=template, input_variables=['context'])
|
| 14 |
+
chain = LLMChain(llm=llm, prompt=prompt)
|
| 15 |
+
return chain.run(
|
| 16 |
+
{
|
| 17 |
+
'context': context,
|
| 18 |
+
}
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
def LLMChain_test(context):
|
| 22 |
+
llm = OpenAI(model_name="text-davinci-003", engine="text-davinci-003",temperature=0)
|
| 23 |
+
prompt = PromptTemplate(template=template, input_variables=['context','positive', 'negative','indifference'])
|
| 24 |
+
chain = LLMChain(llm=llm, prompt=prompt)
|
| 25 |
+
return chain.run(
|
| 26 |
+
{
|
| 27 |
+
'context': context,
|
| 28 |
+
'positive': '正面',
|
| 29 |
+
'negative': '负面',
|
| 30 |
+
'indifference': '无关'
|
| 31 |
+
}
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
def LLMChain_test_many(context,count):
|
| 35 |
+
llm = OpenAI(model_name="text-davinci-003", engine="text-davinci-003", temperature=0,max_tokens=3000,verbose=True)
|
| 36 |
+
prompt = PromptTemplate(template=template_many,
|
| 37 |
+
input_variables=['count','context', 'positive', 'negative', 'indifference'],
|
| 38 |
+
)
|
| 39 |
+
chain = LLMChain(llm=llm, prompt=prompt)
|
| 40 |
+
return chain.run(
|
| 41 |
+
{
|
| 42 |
+
'count': count,
|
| 43 |
+
'context': context,
|
| 44 |
+
'positive': '正面',
|
| 45 |
+
'negative': '负面',
|
| 46 |
+
'indifference': '无关'
|
| 47 |
+
})
|
| 48 |
+
|
| 49 |
+
def LLMChain_test_many_output(context,count):
|
| 50 |
+
llm = OpenAI(model_name="text-davinci-003", engine="text-davinci-003", temperature=0,max_tokens=3000,verbose=True)
|
| 51 |
+
|
| 52 |
+
response_schemas = [
|
| 53 |
+
ResponseSchema(name="index", description="句子的序号作为数组的下标"),
|
| 54 |
+
ResponseSchema(name="emotion", description="每个句子的情绪,枚举值:正面、负面、无关"),
|
| 55 |
+
ResponseSchema(name="description", description="对每个句子的总结"),
|
| 56 |
+
ResponseSchema(name="count", description="单独key整体的正面、负面、无关的条数,并且只对整体"),
|
| 57 |
+
ResponseSchema(name="summary", description="给出的所有句子整体的总结")
|
| 58 |
+
]
|
| 59 |
+
|
| 60 |
+
# 初始化解析器
|
| 61 |
+
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
|
| 62 |
+
|
| 63 |
+
format_instructions = output_parser.get_format_instructions()
|
| 64 |
+
|
| 65 |
+
prompt = PromptTemplate(template=template_many_out_put,
|
| 66 |
+
input_variables=['count','context', 'positive', 'negative', 'indifference'],
|
| 67 |
+
partial_variables={"format_instructions": format_instructions},)
|
| 68 |
+
|
| 69 |
+
promptValue = prompt.format(**{
|
| 70 |
+
'count': count,
|
| 71 |
+
'context': context,
|
| 72 |
+
'positive': '正面',
|
| 73 |
+
'negative': '负面',
|
| 74 |
+
'indifference': '无关'
|
| 75 |
+
})
|
| 76 |
+
llm_output = llm(promptValue)
|
| 77 |
+
# return output_parser.parse(llm_output)
|
| 78 |
+
return llm_output
|
| 79 |
+
|
| 80 |
+
|
multipleCommentOutputs.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
from model import LLMChain_test_many_output, LLMChain_test_outputs
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def excel_2_csv(excel_file,csv_file):
|
| 9 |
+
df = pd.read_excel(excel_file)
|
| 10 |
+
df.to_csv(csv_file, index=False)
|
| 11 |
+
|
| 12 |
+
def summary_outputs(context, result_count):
|
| 13 |
+
return LLMChain_test_outputs(context,result_count)
|
| 14 |
+
|
| 15 |
+
def multiple_comment_outputs(csv_file,batch_count):
|
| 16 |
+
df = pd.read_csv(csv_file)
|
| 17 |
+
column = df['评论内容']
|
| 18 |
+
data = []
|
| 19 |
+
result = []
|
| 20 |
+
result_count = []
|
| 21 |
+
for i in range(0, len(column[0:batch_count]), 10):
|
| 22 |
+
batch = column[i:i + 10]
|
| 23 |
+
count = len(batch)
|
| 24 |
+
text = ("\n").join(["{}.{}".format(index+i+1,value) for index,value in enumerate(batch)])
|
| 25 |
+
res = LLMChain_test_many_output(text,count)
|
| 26 |
+
json_string = res.split("```json")[1].strip().strip("```").strip()
|
| 27 |
+
json_data = json.loads(json_string)
|
| 28 |
+
result.append(json_data.get("summary"))
|
| 29 |
+
result_count.append(json_data.get("count"))
|
| 30 |
+
data.append(json_data)
|
| 31 |
+
total_positive = sum(item["正面"] for item in result_count)
|
| 32 |
+
total_negative = sum(item["负面"] for item in result_count)
|
| 33 |
+
total_indifference = sum(item["无关"] for item in result_count)
|
| 34 |
+
total = total_positive+total_negative+total_indifference
|
| 35 |
+
positive = round(total_positive/total, 2)*100
|
| 36 |
+
negative = round(total_negative/total, 2)*100
|
| 37 |
+
neutral = round(total_indifference/total, 2)*100
|
| 38 |
+
summary = summary_outputs(";".join(result), result_count)
|
| 39 |
+
return {
|
| 40 |
+
"data": data,
|
| 41 |
+
"positive": positive,
|
| 42 |
+
"negative": negative,
|
| 43 |
+
"neutral": neutral,
|
| 44 |
+
"summary": summary,
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
if __name__ == '__main__':
|
| 52 |
+
excel_file = 'test.xlsx'
|
| 53 |
+
csv_file = 'example.csv'
|
| 54 |
+
# excel_2_csv(excel_file, csv_file)
|
| 55 |
+
|
| 56 |
+
load_dotenv()
|
| 57 |
+
multiple_comment_outputs(csv_file)
|
| 58 |
+
|
| 59 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
transformers
|
| 3 |
+
torch
|
| 4 |
+
PyPDF2
|
| 5 |
+
langchain
|
| 6 |
+
openai
|
| 7 |
+
tiktoken
|
| 8 |
+
faiss-cpu
|
| 9 |
+
spacy
|
| 10 |
+
pinecone-client
|
| 11 |
+
pypdf
|
| 12 |
+
python-dotenv
|
template.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
template = '''Provide a stunning and constructive summary based on the following text
|
| 2 |
+
{context},
|
| 3 |
+
'''
|
| 4 |
+
|
| 5 |
+
template_many = '''给出下面一段上下文中,对以下{count}句子分别给出总结:
|
| 6 |
+
{context}
|
| 7 |
+
请用“{positive}”或“{negative}”或“{indifference}”回答,并给出总结
|
| 8 |
+
最后对这{count}条进行统计“{positive}”和“{negative}”和“{indifference}”的百分占比
|
| 9 |
+
'''
|
| 10 |
+
|
| 11 |
+
template_many_out_put = '''Given the following context, provide a summary for each of the {count} sentences:
|
| 12 |
+
{context}
|
| 13 |
+
Please answer with "{positive}" or "{negative}" or "{indifference}", and provide a summary.
|
| 14 |
+
Finally, calculate the count of "{positive}", "{negative}", and "{indifference}" among these {count} sentences.
|
| 15 |
+
{format_instructions}
|
| 16 |
+
'''
|
| 17 |
+
|
| 18 |
+
|
test.txt
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
既占用冰摇杯又占用blender,严重影响出饮速度,有考虑过一线实操的可行性吗?为什么使用相同配方,不以冰杯作为参照物?有考虑过景区门店的可操作性吗?同时,新品相较于其他品牌饮品有很大竞争力吗?
|
| 2 |
+
回复 @Rachel Zhou(周婉盈) :伙伴你好,早餐门店可以加购的
|
| 3 |
+
你好,我在咖快点了一杯生啡做好了吗[微笑]
|
| 4 |
+
回复 @Zoe Liang(梁晶) :星冰爽
|
| 5 |
+
还是逃不过星冰乐呗?生咖细腻版?
|
| 6 |
+
这是个什么鬼东西。
|
| 7 |
+
我喝出了AD钙奶的味道
|
| 8 |
+
设计部混吃的啊
|
| 9 |
+
回复@Olivia PENG(彭潇涵) :就是嘞,吧物料放进搅拌机就Ok.
|
| 10 |
+
看着像粉粉生咖(冰沙版)
|
| 11 |
+
1
|
| 12 |
+
怎么没有加云的版本?奶盖可能更好喝点。
|
| 13 |
+
回复 @Benson Liu(刘彬彬) :饮品名 打成了 饮品颜名 多了一个颜字
|
| 14 |
+
App上为啥没办法定制加糖浆啊?只能选择少冰
|
| 15 |
+
中杯电子券也用不了
|
| 16 |
+
为啥用不了买一赠一
|
| 17 |
+
回复 @Rachel Zhou(周婉盈) :啡快可以,刚刚用电脑尝试不行。
|
| 18 |
+
为啥伙伴券用不了!
|
| 19 |
+
请问大家的pinkdrink能加早餐套餐吗
|
| 20 |
+
为啥我喝粉粉生咖 我喝出了 儿童版急支糖浆那个味道
|
| 21 |
+
回复@Vincent Hou(侯竑宇) :天然的颜色呈现错了?
|
| 22 |
+
粉粉生咖~萨瓦里卡~
|
| 23 |
+
回复 @Bobby ZHANG(张禹涵) :听君一席话如听一席话
|
| 24 |
+
生咖和低因豆拿铁相比,等量杯型谁更低?
|
| 25 |
+
海报有错别字“生咖系列饮品颜名以天然呈色” 你们检查一下
|
| 26 |
+
如果不难喝的话 还挺好喝的
|
| 27 |
+
现在LTO都降价了,不再是38/41/44了
|
| 28 |
+
回复 @Gofree Li(李迪) :lobby的很不错
|
| 29 |
+
文案可以不用那么浮夸吗
|
| 30 |
+
回复@Alan Li(李超) :昨天就有…
|
| 31 |
+
有点像蜜雪冰城
|
test.xlsx
ADDED
|
Binary file (158 kB). View file
|
|
|