File size: 5,213 Bytes
c2fa49d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3019d6a
c2fa49d
 
3019d6a
 
c2fa49d
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import os
import fitz  # PyMuPDF
import gradio as gr
import autogen
from autogen.agentchat.contrib.capabilities import transform_messages, transforms
from autogen.agentchat.contrib.capabilities.text_compressors import LLMLingua
from autogen.agentchat.contrib.capabilities.transforms import TextMessageCompressor
import mysql.connector

import copy
import pprint
import re
from typing import Dict, List, Tuple

# MySQLに接続
conn = mysql.connector.connect(
    host="www.ryhintl.com",
    user="smairuser",
    password="smairuser",
    port=36000,
    database="smair"
)

# カーソルを取得
cursor = conn.cursor(dictionary=True)

# List API Keys
select_one_data_query = "SELECT * FROM agentic_apis"
cursor.execute(select_one_data_query)
result = cursor.fetchall()
# JSONをパースしてkeyを抽出
keys = [item['key'] for item in result]

os.environ["GROQ_API_KEY"] = keys[2]

def extract_text_from_pdf(file):
    text = ""
    with fitz.open("./"+file) as doc:
        for page in doc:
            text += page.get_text()

    return text

def summarize_pdf(file):
    pdf_text = extract_text_from_pdf(file)
    #pdf_text = extract_text_from_pdf(text)
    
    #system_message = "You are a world class researcher."
    system_message = "貴方は世界的なレベルのリサーチャーです。"
    config_list = [{
            "model": "llama-3.3-70b-versatile",
            #"model": "llama-guard-3-8b",
            "api_key": os.environ.get("GROQ_API_KEY"),
            "api_type": "groq"
        }]
    
    researcher = autogen.ConversableAgent(
        "assistant",
        llm_config={"config_list": config_list},
        max_consecutive_auto_reply=1,
        system_message=system_message,
        human_input_mode="NEVER",
    )
    user_proxy = autogen.UserProxyAgent(
        "user_proxy",
        human_input_mode="NEVER",
        is_termination_msg=lambda x: "TERMINATE" in x.get("content", ""),
        max_consecutive_auto_reply=1,
        code_execution_config={"work_dir":"coding", "use_docker":False}
    )
    
    llm_lingua = LLMLingua()
    #text_compressor = TextMessageCompressor(text_compressor=llm_lingua,compression_params={"target_token": 13000},cache=None)
    text_compressor = TextMessageCompressor(text_compressor=llm_lingua,compression_params={"target_token": 5999},cache=None)
    compressed_text = text_compressor.apply_transform([{"content": pdf_text}])
    
    
    #content_list = [item['content'] for item in compressed_text]
    # Limit the message history to the 3 most recent messages
    max_msg_transfrom = transforms.MessageHistoryLimiter(max_messages=3)

    # Limit the token limit per message to 10 tokens
    token_limit_transform = transforms.MessageTokenLimiter(max_tokens_per_message=3, min_tokens=10)
    
    '''messages = [
        {"role": "user", "content": "hello"},
        {"role": "assistant", "content": [{"type": "text", "text": "there"}]},
        {"role": "user", "content": "how"},
        {"role": "assistant", "content": [{"type": "text", "text": "are you doing?"}]},
        {"role": "user", "content": "very very very very very very long string"},
    ]

    processed_messages = max_msg_transfrom.apply_transform(copy.deepcopy(messages))
    pprint.pprint(processed_messages)

    processed_messages = token_limit_transform.apply_transform(copy.deepcopy(messages))

    pprint.pprint(processed_messages)'''
    

    context_handling = transform_messages.TransformMessages(
        transforms=[
            transforms.MessageHistoryLimiter(max_messages=10),
            transforms.MessageTokenLimiter(max_tokens=6000, max_tokens_per_message=2000, min_tokens=500),
            #transforms.MessageTokenLimiter(max_tokens=1000, max_tokens_per_message=50, min_tokens=500),
        ]
    )

    context_handling.add_to_agent(researcher)
    
    
    #context_handling = transform_messages.TransformMessages(transforms=[text_compressor])
    #context_handling.add_to_agent(researcher)
    
    #message = "Summarize this research paper for me in Japanese, include the important information" + pdf_text
    message = "この資料を日本語で要約し、重要な情報を含めてください。節約されたトークン数も表示してください。" + pdf_text
    result = user_proxy.initiate_chat(recipient=researcher, clear_history=True, message=message, silent=True)
    
    #print(text_compressor.get_logs([], []))
    # タプルの例
    tresult = text_compressor.get_logs([], [])
    #print(tresult)

    # 文字列から数字を抽出
    #saved_tokens = str(int(tresult[0].split()[0]))
    #print(saved_tokens)
    
    return result.chat_history[1]["content"]
    #return result.chat_history[1]["content"]+"\n\n"+saved_tokens+"トークンが節約できました。"

iface = gr.Interface(
    fn=summarize_pdf,
    #inputs=gr.inputs.File(label="Upload PDF"),
    inputs=gr.Dropdown(
        choices=["yoin.pdf", "spo_revenue.pdf", "lings.pdf", "korea-ai.pdf"],  # ドロップダウンの選択肢を指定
        label="PDFを選択"  # ラベルを指定
    ),
    outputs="text",
    title="Research Paper Summarizer",
    description="Select a PDF and get a summary in Japanese."
)

iface.launch()