File size: 6,357 Bytes
4543e5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98d82d2
4543e5a
0b436fd
 
4543e5a
 
 
 
98d82d2
4543e5a
98d82d2
4543e5a
98d82d2
 
 
 
 
 
 
4543e5a
 
 
98d82d2
4543e5a
98d82d2
 
0b436fd
98d82d2
 
 
0cf7f07
98d82d2
 
b9af798
98d82d2
 
 
4543e5a
98d82d2
 
b9af798
b66f579
98d82d2
4543e5a
98d82d2
0b436fd
98d82d2
 
b66f579
98d82d2
 
 
 
 
 
 
 
 
 
b66f579
98d82d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b66f579
98d82d2
b66f579
98d82d2
 
 
 
4543e5a
98d82d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b66f579
98d82d2
 
 
 
 
0b436fd
0508a5a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
import os
import time
import shutil
import uuid
import gradio as gr
import requests
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from threading import Thread
from dotenv import load_dotenv

load_dotenv()

# === CONFIG ===
STORAGE_DIR = "storage"
CLEANUP_INTERVAL = 600  # 10 min
SESSION_TTL = 1000      # 30 min
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
OPENROUTER_MODEL = "z-ai/glm-4.5-air:free"

if not os.path.exists(STORAGE_DIR):
    os.makedirs(STORAGE_DIR)

# === CLEANUP THREAD ===
def cleanup_old_sessions():
    while True:
        now = time.time()
        for folder in os.listdir(STORAGE_DIR):
            path = os.path.join(STORAGE_DIR, folder)
            if os.path.isdir(path) and now - os.path.getmtime(path) > SESSION_TTL:
                shutil.rmtree(path)
        time.sleep(CLEANUP_INTERVAL)

Thread(target=cleanup_old_sessions, daemon=True).start()

# === PDF PROCESSING ===
def process_pdf(pdf_file):
    if pdf_file is None:
        return "No file uploaded.", "", []

    session_id = str(uuid.uuid4())
    reader = PdfReader(pdf_file.name)
    text = "".join([page.extract_text() for page in reader.pages if page.extract_text()])

    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = splitter.split_text(text)

    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    session_path = os.path.join(STORAGE_DIR, session_id)
    os.makedirs(session_path, exist_ok=True)

    db = FAISS.from_texts(chunks, embeddings)
    db.save_local(session_path)

    chat_history = [("System", "Paper uploaded and processed. You can now ask questions.")]
    return f"Paper uploaded successfully. Session ID: {session_id}", session_id, chat_history

# === QUERY FUNCTION ===
def query_paper(session_id, user_message, chat_history):
    if not session_id or not os.path.exists(os.path.join(STORAGE_DIR, session_id)):
        chat_history = chat_history or []
        chat_history.append(("System", "Session expired or not found. Upload the paper again."))
        return chat_history, ""

    if not user_message.strip():
        return chat_history, ""

    session_path = os.path.join(STORAGE_DIR, session_id)
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    db = FAISS.load_local(session_path, embeddings, allow_dangerous_deserialization=True)
    retriever = db.as_retriever(search_kwargs={"k": 3})

    # Use invoke() instead of deprecated method
    docs = retriever.invoke(user_message)
    context = "\n\n".join([d.page_content for d in docs])

    prompt = f"""
    You are an AI assistant. Explain the following research paper content in simple terms and answer the question.
    Use your own knowledge also and make it more technical but simpler explanation should be like professor with 
    high knowledge but teaches in awesome way with more technical stuff but easier.

    Context from paper:
    {context}

    Question: {user_message}
    Answer:
    """

    headers = {
        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": OPENROUTER_MODEL,
        "messages": [
            {
                "role": "system",
                "content": "You are a helpful research paper explainer. Explain all concepts clearly with technical aspects but in an easy way."
            },
            {"role": "user", "content": prompt}
        ]
    }

    try:
        response = requests.post("https://openrouter.ai/api/v1/chat/completions",
                                 headers=headers, json=payload)

        if response.status_code == 200:
            answer = response.json()["choices"][0]["message"]["content"].strip()
        else:
            answer = f"Error: {response.status_code} - {response.text}"
    except Exception as e:
        answer = f"Error: {str(e)}"

    # Update chat history (tuple format)
    chat_history = chat_history or []
    chat_history.append((user_message, answer))

    return chat_history, ""

# === GRADIO UI ===
with gr.Blocks() as demo:
    gr.Markdown("## 📄 Research Paper Chatbot (RAG + OpenRouter)")

    with gr.Row():
        pdf_input = gr.File(label="Upload Research Paper (PDF)", file_types=[".pdf"])
        session_box = gr.Textbox(label="Session ID", interactive=False)

    chatbot = gr.Chatbot(label="Chat about your paper", height=400)
    user_message = gr.Textbox(label="Ask a question", placeholder="What is this paper about?")

    with gr.Row():
        upload_btn = gr.Button("Upload Paper", variant="primary")
        ask_btn = gr.Button("Send Question")
        clear_btn = gr.Button("Clear Chat")

    # Store chat history and session
    state_chat = gr.State([])
    state_session = gr.State("")

    # Upload button functionality
    def handle_upload(pdf_file):
        status, session_id, chat_history = process_pdf(pdf_file)
        return status, session_id, chat_history

    upload_btn.click(
        fn=handle_upload,
        inputs=[pdf_input],
        outputs=[session_box, state_session, chatbot]
    )

    # Ask button functionality
    def handle_question(session_id, message, chat_history):
        updated_chat, _ = query_paper(session_id, message, chat_history)
        return updated_chat, ""

    ask_btn.click(
        fn=handle_question,
        inputs=[state_session, user_message, chatbot],
        outputs=[chatbot, user_message]
    ).then(
        lambda chat: chat,
        inputs=[chatbot],
        outputs=[state_chat]
    )

    # Submit on enter
    user_message.submit(
        fn=handle_question,
        inputs=[state_session, user_message, chatbot],
        outputs=[chatbot, user_message]
    ).then(
        lambda chat: chat,
        inputs=[chatbot],
        outputs=[state_chat]
    )

    # Clear chat
    def clear_chat():
        return [], []

    clear_btn.click(
        fn=clear_chat,
        outputs=[chatbot, state_chat]
    )

    # Sync chat state with chatbot
    state_chat.change(
        lambda chat: chat,
        inputs=[state_chat],
        outputs=[chatbot]
    )

demo.launch(debug=True, server_name="0.0.0.0",server_port=7860)