savers / app.py
fudii0921's picture
Update app.py
3019d6a verified
import os
import fitz # PyMuPDF
import gradio as gr
import autogen
from autogen.agentchat.contrib.capabilities import transform_messages, transforms
from autogen.agentchat.contrib.capabilities.text_compressors import LLMLingua
from autogen.agentchat.contrib.capabilities.transforms import TextMessageCompressor
import mysql.connector
import copy
import pprint
import re
from typing import Dict, List, Tuple
# MySQLに接続
conn = mysql.connector.connect(
host="www.ryhintl.com",
user="smairuser",
password="smairuser",
port=36000,
database="smair"
)
# カーソルを取得
cursor = conn.cursor(dictionary=True)
# List API Keys
select_one_data_query = "SELECT * FROM agentic_apis"
cursor.execute(select_one_data_query)
result = cursor.fetchall()
# JSONをパースしてkeyを抽出
keys = [item['key'] for item in result]
os.environ["GROQ_API_KEY"] = keys[2]
def extract_text_from_pdf(file):
text = ""
with fitz.open("./"+file) as doc:
for page in doc:
text += page.get_text()
return text
def summarize_pdf(file):
pdf_text = extract_text_from_pdf(file)
#pdf_text = extract_text_from_pdf(text)
#system_message = "You are a world class researcher."
system_message = "貴方は世界的なレベルのリサーチャーです。"
config_list = [{
"model": "llama-3.3-70b-versatile",
#"model": "llama-guard-3-8b",
"api_key": os.environ.get("GROQ_API_KEY"),
"api_type": "groq"
}]
researcher = autogen.ConversableAgent(
"assistant",
llm_config={"config_list": config_list},
max_consecutive_auto_reply=1,
system_message=system_message,
human_input_mode="NEVER",
)
user_proxy = autogen.UserProxyAgent(
"user_proxy",
human_input_mode="NEVER",
is_termination_msg=lambda x: "TERMINATE" in x.get("content", ""),
max_consecutive_auto_reply=1,
code_execution_config={"work_dir":"coding", "use_docker":False}
)
llm_lingua = LLMLingua()
#text_compressor = TextMessageCompressor(text_compressor=llm_lingua,compression_params={"target_token": 13000},cache=None)
text_compressor = TextMessageCompressor(text_compressor=llm_lingua,compression_params={"target_token": 5999},cache=None)
compressed_text = text_compressor.apply_transform([{"content": pdf_text}])
#content_list = [item['content'] for item in compressed_text]
# Limit the message history to the 3 most recent messages
max_msg_transfrom = transforms.MessageHistoryLimiter(max_messages=3)
# Limit the token limit per message to 10 tokens
token_limit_transform = transforms.MessageTokenLimiter(max_tokens_per_message=3, min_tokens=10)
'''messages = [
{"role": "user", "content": "hello"},
{"role": "assistant", "content": [{"type": "text", "text": "there"}]},
{"role": "user", "content": "how"},
{"role": "assistant", "content": [{"type": "text", "text": "are you doing?"}]},
{"role": "user", "content": "very very very very very very long string"},
]
processed_messages = max_msg_transfrom.apply_transform(copy.deepcopy(messages))
pprint.pprint(processed_messages)
processed_messages = token_limit_transform.apply_transform(copy.deepcopy(messages))
pprint.pprint(processed_messages)'''
context_handling = transform_messages.TransformMessages(
transforms=[
transforms.MessageHistoryLimiter(max_messages=10),
transforms.MessageTokenLimiter(max_tokens=6000, max_tokens_per_message=2000, min_tokens=500),
#transforms.MessageTokenLimiter(max_tokens=1000, max_tokens_per_message=50, min_tokens=500),
]
)
context_handling.add_to_agent(researcher)
#context_handling = transform_messages.TransformMessages(transforms=[text_compressor])
#context_handling.add_to_agent(researcher)
#message = "Summarize this research paper for me in Japanese, include the important information" + pdf_text
message = "この資料を日本語で要約し、重要な情報を含めてください。節約されたトークン数も表示してください。" + pdf_text
result = user_proxy.initiate_chat(recipient=researcher, clear_history=True, message=message, silent=True)
#print(text_compressor.get_logs([], []))
# タプルの例
tresult = text_compressor.get_logs([], [])
#print(tresult)
# 文字列から数字を抽出
#saved_tokens = str(int(tresult[0].split()[0]))
#print(saved_tokens)
return result.chat_history[1]["content"]
#return result.chat_history[1]["content"]+"\n\n"+saved_tokens+"トークンが節約できました。"
iface = gr.Interface(
fn=summarize_pdf,
#inputs=gr.inputs.File(label="Upload PDF"),
inputs=gr.Dropdown(
choices=["yoin.pdf", "spo_revenue.pdf", "lings.pdf", "korea-ai.pdf"], # ドロップダウンの選択肢を指定
label="PDFを選択" # ラベルを指定
),
outputs="text",
title="Research Paper Summarizer",
description="Select a PDF and get a summary in Japanese."
)
iface.launch()