import os import fitz # PyMuPDF import gradio as gr import autogen from autogen.agentchat.contrib.capabilities import transform_messages, transforms from autogen.agentchat.contrib.capabilities.text_compressors import LLMLingua from autogen.agentchat.contrib.capabilities.transforms import TextMessageCompressor import mysql.connector import copy import pprint import re from typing import Dict, List, Tuple # MySQLに接続 conn = mysql.connector.connect( host="www.ryhintl.com", user="smairuser", password="smairuser", port=36000, database="smair" ) # カーソルを取得 cursor = conn.cursor(dictionary=True) # List API Keys select_one_data_query = "SELECT * FROM agentic_apis" cursor.execute(select_one_data_query) result = cursor.fetchall() # JSONをパースしてkeyを抽出 keys = [item['key'] for item in result] os.environ["GROQ_API_KEY"] = keys[2] def extract_text_from_pdf(file): text = "" with fitz.open("./"+file) as doc: for page in doc: text += page.get_text() return text def summarize_pdf(file): pdf_text = extract_text_from_pdf(file) #pdf_text = extract_text_from_pdf(text) #system_message = "You are a world class researcher." system_message = "貴方は世界的なレベルのリサーチャーです。" config_list = [{ "model": "llama-3.3-70b-versatile", #"model": "llama-guard-3-8b", "api_key": os.environ.get("GROQ_API_KEY"), "api_type": "groq" }] researcher = autogen.ConversableAgent( "assistant", llm_config={"config_list": config_list}, max_consecutive_auto_reply=1, system_message=system_message, human_input_mode="NEVER", ) user_proxy = autogen.UserProxyAgent( "user_proxy", human_input_mode="NEVER", is_termination_msg=lambda x: "TERMINATE" in x.get("content", ""), max_consecutive_auto_reply=1, code_execution_config={"work_dir":"coding", "use_docker":False} ) llm_lingua = LLMLingua() #text_compressor = TextMessageCompressor(text_compressor=llm_lingua,compression_params={"target_token": 13000},cache=None) text_compressor = TextMessageCompressor(text_compressor=llm_lingua,compression_params={"target_token": 5999},cache=None) compressed_text = text_compressor.apply_transform([{"content": pdf_text}]) #content_list = [item['content'] for item in compressed_text] # Limit the message history to the 3 most recent messages max_msg_transfrom = transforms.MessageHistoryLimiter(max_messages=3) # Limit the token limit per message to 10 tokens token_limit_transform = transforms.MessageTokenLimiter(max_tokens_per_message=3, min_tokens=10) '''messages = [ {"role": "user", "content": "hello"}, {"role": "assistant", "content": [{"type": "text", "text": "there"}]}, {"role": "user", "content": "how"}, {"role": "assistant", "content": [{"type": "text", "text": "are you doing?"}]}, {"role": "user", "content": "very very very very very very long string"}, ] processed_messages = max_msg_transfrom.apply_transform(copy.deepcopy(messages)) pprint.pprint(processed_messages) processed_messages = token_limit_transform.apply_transform(copy.deepcopy(messages)) pprint.pprint(processed_messages)''' context_handling = transform_messages.TransformMessages( transforms=[ transforms.MessageHistoryLimiter(max_messages=10), transforms.MessageTokenLimiter(max_tokens=6000, max_tokens_per_message=2000, min_tokens=500), #transforms.MessageTokenLimiter(max_tokens=1000, max_tokens_per_message=50, min_tokens=500), ] ) context_handling.add_to_agent(researcher) #context_handling = transform_messages.TransformMessages(transforms=[text_compressor]) #context_handling.add_to_agent(researcher) #message = "Summarize this research paper for me in Japanese, include the important information" + pdf_text message = "この資料を日本語で要約し、重要な情報を含めてください。節約されたトークン数も表示してください。" + pdf_text result = user_proxy.initiate_chat(recipient=researcher, clear_history=True, message=message, silent=True) #print(text_compressor.get_logs([], [])) # タプルの例 tresult = text_compressor.get_logs([], []) #print(tresult) # 文字列から数字を抽出 #saved_tokens = str(int(tresult[0].split()[0])) #print(saved_tokens) return result.chat_history[1]["content"] #return result.chat_history[1]["content"]+"\n\n"+saved_tokens+"トークンが節約できました。" iface = gr.Interface( fn=summarize_pdf, #inputs=gr.inputs.File(label="Upload PDF"), inputs=gr.Dropdown( choices=["yoin.pdf", "spo_revenue.pdf", "lings.pdf", "korea-ai.pdf"], # ドロップダウンの選択肢を指定 label="PDFを選択" # ラベルを指定 ), outputs="text", title="Research Paper Summarizer", description="Select a PDF and get a summary in Japanese." ) iface.launch()