fudii0921 commited on
Commit
c2fa49d
·
verified ·
1 Parent(s): 22162f3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +142 -0
app.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import fitz # PyMuPDF
3
+ import gradio as gr
4
+ import autogen
5
+ from autogen.agentchat.contrib.capabilities import transform_messages, transforms
6
+ from autogen.agentchat.contrib.capabilities.text_compressors import LLMLingua
7
+ from autogen.agentchat.contrib.capabilities.transforms import TextMessageCompressor
8
+ import mysql.connector
9
+
10
+ import copy
11
+ import pprint
12
+ import re
13
+ from typing import Dict, List, Tuple
14
+
15
+ # MySQLに接続
16
+ conn = mysql.connector.connect(
17
+ host="www.ryhintl.com",
18
+ user="smairuser",
19
+ password="smairuser",
20
+ port=36000,
21
+ database="smair"
22
+ )
23
+
24
+ # カーソルを取得
25
+ cursor = conn.cursor(dictionary=True)
26
+
27
+ # List API Keys
28
+ select_one_data_query = "SELECT * FROM agentic_apis"
29
+ cursor.execute(select_one_data_query)
30
+ result = cursor.fetchall()
31
+ # JSONをパースしてkeyを抽出
32
+ keys = [item['key'] for item in result]
33
+
34
+ os.environ["GROQ_API_KEY"] = keys[2]
35
+
36
+ def extract_text_from_pdf(file):
37
+ text = ""
38
+ with fitz.open("./"+file) as doc:
39
+ for page in doc:
40
+ text += page.get_text()
41
+
42
+ return text
43
+
44
+ def summarize_pdf(file):
45
+ pdf_text = extract_text_from_pdf(file)
46
+ #pdf_text = extract_text_from_pdf(text)
47
+
48
+ #system_message = "You are a world class researcher."
49
+ system_message = "貴方は世界的なレベルのリサーチャーです。"
50
+ config_list = [{
51
+ "model": "llama-3.3-70b-versatile",
52
+ #"model": "llama-guard-3-8b",
53
+ "api_key": os.environ.get("GROQ_API_KEY"),
54
+ "api_type": "groq"
55
+ }]
56
+
57
+ researcher = autogen.ConversableAgent(
58
+ "assistant",
59
+ llm_config={"config_list": config_list},
60
+ max_consecutive_auto_reply=1,
61
+ system_message=system_message,
62
+ human_input_mode="NEVER",
63
+ )
64
+ user_proxy = autogen.UserProxyAgent(
65
+ "user_proxy",
66
+ human_input_mode="NEVER",
67
+ is_termination_msg=lambda x: "TERMINATE" in x.get("content", ""),
68
+ max_consecutive_auto_reply=1,
69
+ code_execution_config={"work_dir":"coding", "use_docker":False}
70
+ )
71
+
72
+ llm_lingua = LLMLingua()
73
+ #text_compressor = TextMessageCompressor(text_compressor=llm_lingua,compression_params={"target_token": 13000},cache=None)
74
+ text_compressor = TextMessageCompressor(text_compressor=llm_lingua,compression_params={"target_token": 5999},cache=None)
75
+ compressed_text = text_compressor.apply_transform([{"content": pdf_text}])
76
+
77
+
78
+ #content_list = [item['content'] for item in compressed_text]
79
+ # Limit the message history to the 3 most recent messages
80
+ max_msg_transfrom = transforms.MessageHistoryLimiter(max_messages=3)
81
+
82
+ # Limit the token limit per message to 10 tokens
83
+ token_limit_transform = transforms.MessageTokenLimiter(max_tokens_per_message=3, min_tokens=10)
84
+
85
+ '''messages = [
86
+ {"role": "user", "content": "hello"},
87
+ {"role": "assistant", "content": [{"type": "text", "text": "there"}]},
88
+ {"role": "user", "content": "how"},
89
+ {"role": "assistant", "content": [{"type": "text", "text": "are you doing?"}]},
90
+ {"role": "user", "content": "very very very very very very long string"},
91
+ ]
92
+
93
+ processed_messages = max_msg_transfrom.apply_transform(copy.deepcopy(messages))
94
+ pprint.pprint(processed_messages)
95
+
96
+ processed_messages = token_limit_transform.apply_transform(copy.deepcopy(messages))
97
+
98
+ pprint.pprint(processed_messages)'''
99
+
100
+
101
+ context_handling = transform_messages.TransformMessages(
102
+ transforms=[
103
+ transforms.MessageHistoryLimiter(max_messages=10),
104
+ transforms.MessageTokenLimiter(max_tokens=6000, max_tokens_per_message=2000, min_tokens=500),
105
+ #transforms.MessageTokenLimiter(max_tokens=1000, max_tokens_per_message=50, min_tokens=500),
106
+ ]
107
+ )
108
+
109
+ context_handling.add_to_agent(researcher)
110
+
111
+
112
+ #context_handling = transform_messages.TransformMessages(transforms=[text_compressor])
113
+ #context_handling.add_to_agent(researcher)
114
+
115
+ #message = "Summarize this research paper for me in Japanese, include the important information" + pdf_text
116
+ message = "この資料を日本語で要約し、重要な情報を含めてください。節約されたトークン数も表示してください。" + pdf_text
117
+ result = user_proxy.initiate_chat(recipient=researcher, clear_history=True, message=message, silent=True)
118
+
119
+ #print(text_compressor.get_logs([], []))
120
+ # タプルの例
121
+ tresult = text_compressor.get_logs([], [])
122
+ #print(tresult)
123
+
124
+ # 文字列から数字を抽出
125
+ saved_tokens = str(int(tresult[0].split()[0]))
126
+ #print(saved_tokens)
127
+
128
+ return result.chat_history[1]["content"]+"\n\n"+saved_tokens+"トークンが節約できました。"
129
+
130
+ iface = gr.Interface(
131
+ fn=summarize_pdf,
132
+ #inputs=gr.inputs.File(label="Upload PDF"),
133
+ inputs=gr.Dropdown(
134
+ choices=["yoin.pdf", "spo_revenue.pdf", "lings.pdf", "korea-ai.pdf"], # ドロップダウンの選択肢を指��
135
+ label="PDFを選択" # ラベルを指定
136
+ ),
137
+ outputs="text",
138
+ title="Research Paper Summarizer",
139
+ description="Select a PDF and get a summary in Japanese."
140
+ )
141
+
142
+ iface.launch()