File size: 5,213 Bytes
c2fa49d 3019d6a c2fa49d 3019d6a c2fa49d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | import os
import fitz # PyMuPDF
import gradio as gr
import autogen
from autogen.agentchat.contrib.capabilities import transform_messages, transforms
from autogen.agentchat.contrib.capabilities.text_compressors import LLMLingua
from autogen.agentchat.contrib.capabilities.transforms import TextMessageCompressor
import mysql.connector
import copy
import pprint
import re
from typing import Dict, List, Tuple
# MySQLに接続
conn = mysql.connector.connect(
host="www.ryhintl.com",
user="smairuser",
password="smairuser",
port=36000,
database="smair"
)
# カーソルを取得
cursor = conn.cursor(dictionary=True)
# List API Keys
select_one_data_query = "SELECT * FROM agentic_apis"
cursor.execute(select_one_data_query)
result = cursor.fetchall()
# JSONをパースしてkeyを抽出
keys = [item['key'] for item in result]
os.environ["GROQ_API_KEY"] = keys[2]
def extract_text_from_pdf(file):
text = ""
with fitz.open("./"+file) as doc:
for page in doc:
text += page.get_text()
return text
def summarize_pdf(file):
pdf_text = extract_text_from_pdf(file)
#pdf_text = extract_text_from_pdf(text)
#system_message = "You are a world class researcher."
system_message = "貴方は世界的なレベルのリサーチャーです。"
config_list = [{
"model": "llama-3.3-70b-versatile",
#"model": "llama-guard-3-8b",
"api_key": os.environ.get("GROQ_API_KEY"),
"api_type": "groq"
}]
researcher = autogen.ConversableAgent(
"assistant",
llm_config={"config_list": config_list},
max_consecutive_auto_reply=1,
system_message=system_message,
human_input_mode="NEVER",
)
user_proxy = autogen.UserProxyAgent(
"user_proxy",
human_input_mode="NEVER",
is_termination_msg=lambda x: "TERMINATE" in x.get("content", ""),
max_consecutive_auto_reply=1,
code_execution_config={"work_dir":"coding", "use_docker":False}
)
llm_lingua = LLMLingua()
#text_compressor = TextMessageCompressor(text_compressor=llm_lingua,compression_params={"target_token": 13000},cache=None)
text_compressor = TextMessageCompressor(text_compressor=llm_lingua,compression_params={"target_token": 5999},cache=None)
compressed_text = text_compressor.apply_transform([{"content": pdf_text}])
#content_list = [item['content'] for item in compressed_text]
# Limit the message history to the 3 most recent messages
max_msg_transfrom = transforms.MessageHistoryLimiter(max_messages=3)
# Limit the token limit per message to 10 tokens
token_limit_transform = transforms.MessageTokenLimiter(max_tokens_per_message=3, min_tokens=10)
'''messages = [
{"role": "user", "content": "hello"},
{"role": "assistant", "content": [{"type": "text", "text": "there"}]},
{"role": "user", "content": "how"},
{"role": "assistant", "content": [{"type": "text", "text": "are you doing?"}]},
{"role": "user", "content": "very very very very very very long string"},
]
processed_messages = max_msg_transfrom.apply_transform(copy.deepcopy(messages))
pprint.pprint(processed_messages)
processed_messages = token_limit_transform.apply_transform(copy.deepcopy(messages))
pprint.pprint(processed_messages)'''
context_handling = transform_messages.TransformMessages(
transforms=[
transforms.MessageHistoryLimiter(max_messages=10),
transforms.MessageTokenLimiter(max_tokens=6000, max_tokens_per_message=2000, min_tokens=500),
#transforms.MessageTokenLimiter(max_tokens=1000, max_tokens_per_message=50, min_tokens=500),
]
)
context_handling.add_to_agent(researcher)
#context_handling = transform_messages.TransformMessages(transforms=[text_compressor])
#context_handling.add_to_agent(researcher)
#message = "Summarize this research paper for me in Japanese, include the important information" + pdf_text
message = "この資料を日本語で要約し、重要な情報を含めてください。節約されたトークン数も表示してください。" + pdf_text
result = user_proxy.initiate_chat(recipient=researcher, clear_history=True, message=message, silent=True)
#print(text_compressor.get_logs([], []))
# タプルの例
tresult = text_compressor.get_logs([], [])
#print(tresult)
# 文字列から数字を抽出
#saved_tokens = str(int(tresult[0].split()[0]))
#print(saved_tokens)
return result.chat_history[1]["content"]
#return result.chat_history[1]["content"]+"\n\n"+saved_tokens+"トークンが節約できました。"
iface = gr.Interface(
fn=summarize_pdf,
#inputs=gr.inputs.File(label="Upload PDF"),
inputs=gr.Dropdown(
choices=["yoin.pdf", "spo_revenue.pdf", "lings.pdf", "korea-ai.pdf"], # ドロップダウンの選択肢を指定
label="PDFを選択" # ラベルを指定
),
outputs="text",
title="Research Paper Summarizer",
description="Select a PDF and get a summary in Japanese."
)
iface.launch() |