binliu commited on
Commit
eb72193
·
1 Parent(s): fc74cd7

multiple Comment Output

Browse files
Files changed (8) hide show
  1. example.csv +0 -0
  2. main.py +24 -0
  3. model.py +80 -0
  4. multipleCommentOutputs.py +59 -0
  5. requirements.txt +12 -0
  6. template.py +18 -0
  7. test.txt +31 -0
  8. test.xlsx +0 -0
example.csv ADDED
The diff for this file is too large to render. See raw diff
 
main.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+
3
+ from multipleCommentOutputs import multiple_comment_outputs
4
+
5
+ if __name__ == '__main__':
6
+ load_dotenv()
7
+
8
+ # 单个评论
9
+ # context = '中杯电子券也用不了'
10
+ # print(LLMChain_test_single(context))
11
+
12
+
13
+ # 多评论格式化输出
14
+ csv_file = 'example.csv'
15
+ print(multiple_comment_outputs(csv_file,20))
16
+
17
+ # csv_file = 'example.csv'
18
+ # refine_multiple_comment_outputs(csv_file)
19
+
20
+
21
+
22
+
23
+
24
+
model.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chains.combine_documents.refine import RefineDocumentsChain
2
+ from langchain.chains.qa_with_sources import load_qa_with_sources_chain
3
+
4
+ from template import template, template_many, template_many_out_put
5
+ from langchain import LLMChain
6
+ from langchain.llms import OpenAI, AzureOpenAI
7
+ from langchain.output_parsers import StructuredOutputParser, ResponseSchema
8
+ from langchain.prompts import PromptTemplate
9
+
10
+ def LLMChain_test_outputs(context, result_count):
11
+
12
+ llm = OpenAI(model_name="text-davinci-003", engine="text-davinci-003",temperature=0)
13
+ prompt = PromptTemplate(template=template, input_variables=['context'])
14
+ chain = LLMChain(llm=llm, prompt=prompt)
15
+ return chain.run(
16
+ {
17
+ 'context': context,
18
+ }
19
+ )
20
+
21
+ def LLMChain_test(context):
22
+ llm = OpenAI(model_name="text-davinci-003", engine="text-davinci-003",temperature=0)
23
+ prompt = PromptTemplate(template=template, input_variables=['context','positive', 'negative','indifference'])
24
+ chain = LLMChain(llm=llm, prompt=prompt)
25
+ return chain.run(
26
+ {
27
+ 'context': context,
28
+ 'positive': '正面',
29
+ 'negative': '负面',
30
+ 'indifference': '无关'
31
+ }
32
+ )
33
+
34
+ def LLMChain_test_many(context,count):
35
+ llm = OpenAI(model_name="text-davinci-003", engine="text-davinci-003", temperature=0,max_tokens=3000,verbose=True)
36
+ prompt = PromptTemplate(template=template_many,
37
+ input_variables=['count','context', 'positive', 'negative', 'indifference'],
38
+ )
39
+ chain = LLMChain(llm=llm, prompt=prompt)
40
+ return chain.run(
41
+ {
42
+ 'count': count,
43
+ 'context': context,
44
+ 'positive': '正面',
45
+ 'negative': '负面',
46
+ 'indifference': '无关'
47
+ })
48
+
49
+ def LLMChain_test_many_output(context,count):
50
+ llm = OpenAI(model_name="text-davinci-003", engine="text-davinci-003", temperature=0,max_tokens=3000,verbose=True)
51
+
52
+ response_schemas = [
53
+ ResponseSchema(name="index", description="句子的序号作为数组的下标"),
54
+ ResponseSchema(name="emotion", description="每个句子的情绪,枚举值:正面、负面、无关"),
55
+ ResponseSchema(name="description", description="对每个句子的总结"),
56
+ ResponseSchema(name="count", description="单独key整体的正面、负面、无关的条数,并且只对整体"),
57
+ ResponseSchema(name="summary", description="给出的所有句子整体的总结")
58
+ ]
59
+
60
+ # 初始化解析器
61
+ output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
62
+
63
+ format_instructions = output_parser.get_format_instructions()
64
+
65
+ prompt = PromptTemplate(template=template_many_out_put,
66
+ input_variables=['count','context', 'positive', 'negative', 'indifference'],
67
+ partial_variables={"format_instructions": format_instructions},)
68
+
69
+ promptValue = prompt.format(**{
70
+ 'count': count,
71
+ 'context': context,
72
+ 'positive': '正面',
73
+ 'negative': '负面',
74
+ 'indifference': '无关'
75
+ })
76
+ llm_output = llm(promptValue)
77
+ # return output_parser.parse(llm_output)
78
+ return llm_output
79
+
80
+
multipleCommentOutputs.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ import pandas as pd
4
+ from dotenv import load_dotenv
5
+ from model import LLMChain_test_many_output, LLMChain_test_outputs
6
+
7
+
8
+ def excel_2_csv(excel_file,csv_file):
9
+ df = pd.read_excel(excel_file)
10
+ df.to_csv(csv_file, index=False)
11
+
12
+ def summary_outputs(context, result_count):
13
+ return LLMChain_test_outputs(context,result_count)
14
+
15
+ def multiple_comment_outputs(csv_file,batch_count):
16
+ df = pd.read_csv(csv_file)
17
+ column = df['评论内容']
18
+ data = []
19
+ result = []
20
+ result_count = []
21
+ for i in range(0, len(column[0:batch_count]), 10):
22
+ batch = column[i:i + 10]
23
+ count = len(batch)
24
+ text = ("\n").join(["{}.{}".format(index+i+1,value) for index,value in enumerate(batch)])
25
+ res = LLMChain_test_many_output(text,count)
26
+ json_string = res.split("```json")[1].strip().strip("```").strip()
27
+ json_data = json.loads(json_string)
28
+ result.append(json_data.get("summary"))
29
+ result_count.append(json_data.get("count"))
30
+ data.append(json_data)
31
+ total_positive = sum(item["正面"] for item in result_count)
32
+ total_negative = sum(item["负面"] for item in result_count)
33
+ total_indifference = sum(item["无关"] for item in result_count)
34
+ total = total_positive+total_negative+total_indifference
35
+ positive = round(total_positive/total, 2)*100
36
+ negative = round(total_negative/total, 2)*100
37
+ neutral = round(total_indifference/total, 2)*100
38
+ summary = summary_outputs(";".join(result), result_count)
39
+ return {
40
+ "data": data,
41
+ "positive": positive,
42
+ "negative": negative,
43
+ "neutral": neutral,
44
+ "summary": summary,
45
+ }
46
+
47
+
48
+
49
+
50
+
51
+ if __name__ == '__main__':
52
+ excel_file = 'test.xlsx'
53
+ csv_file = 'example.csv'
54
+ # excel_2_csv(excel_file, csv_file)
55
+
56
+ load_dotenv()
57
+ multiple_comment_outputs(csv_file)
58
+
59
+
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ transformers
3
+ torch
4
+ PyPDF2
5
+ langchain
6
+ openai
7
+ tiktoken
8
+ faiss-cpu
9
+ spacy
10
+ pinecone-client
11
+ pypdf
12
+ python-dotenv
template.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ template = '''Provide a stunning and constructive summary based on the following text
2
+ {context},
3
+ '''
4
+
5
+ template_many = '''给出下面一段上下文中,对以下{count}句子分别给出总结:
6
+ {context}
7
+ 请用“{positive}”或“{negative}”或“{indifference}”回答,并给出总结
8
+ 最后对这{count}条进行统计“{positive}”和“{negative}”和“{indifference}”的百分占比
9
+ '''
10
+
11
+ template_many_out_put = '''Given the following context, provide a summary for each of the {count} sentences:
12
+ {context}
13
+ Please answer with "{positive}" or "{negative}" or "{indifference}", and provide a summary.
14
+ Finally, calculate the count of "{positive}", "{negative}", and "{indifference}" among these {count} sentences.
15
+ {format_instructions}
16
+ '''
17
+
18
+
test.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 既占用冰摇杯又占用blender,严重影响出饮速度,有考虑过一线实操的可行性吗?为什么使用相同配方,不以冰杯作为参照物?有考虑过景区门店的可操作性吗?同时,新品相较于其他品牌饮品有很大竞争力吗?
2
+ 回复 @Rachel Zhou(周婉盈) :伙伴你好,早餐门店可以加购的
3
+ 你好,我在咖快点了一杯生啡做好了吗[微笑]
4
+ 回复 @Zoe Liang(梁晶) :星冰爽
5
+ 还是逃不过星冰乐呗?生咖细腻版?
6
+ 这是个什么鬼东西。
7
+ 我喝出了AD钙奶的味道
8
+ 设计部混吃的啊
9
+ 回复@Olivia PENG(彭潇涵) :就是嘞,吧物料放进搅拌机就Ok.
10
+ 看着像粉粉生咖(冰沙版)
11
+ 1
12
+ 怎么没有加云的版本?奶盖可能更好喝点。
13
+ 回复 @Benson Liu(刘彬彬) :饮品名 打成了 饮品颜名 多了一个颜字
14
+ App上为啥没办法定制加糖浆啊?只能选择少冰
15
+ 中杯电子券也用不了
16
+ 为啥用不了买一赠一
17
+ 回复 @Rachel Zhou(周婉盈) :啡快可以,刚刚用电脑尝试不行。
18
+ 为啥伙伴券用不了!
19
+ 请问大家的pinkdrink能加早餐套餐吗
20
+ 为啥我喝粉粉生咖 我喝出了 儿童版急支糖浆那个味道
21
+ 回复@Vincent Hou(侯竑宇) :天然的颜色呈现错了?
22
+ 粉粉生咖~萨瓦里卡~
23
+ 回复 @Bobby ZHANG(张禹涵) :听君一席话如听一席话
24
+ 生咖和低因豆拿铁相比,等量杯型谁更低?
25
+ 海报有错别字“生咖系列饮品颜名以天然呈色” 你们检查一下
26
+ 如果不难喝的话 还挺好喝的
27
+ 现在LTO都降价了,不再是38/41/44了
28
+ 回复 @Gofree Li(李迪) :lobby的很不错
29
+ 文案可以不用那么浮夸吗
30
+ 回复@Alan Li(李超) :昨天就有…
31
+ 有点像蜜雪冰城
test.xlsx ADDED
Binary file (158 kB). View file