PraneshJs commited on
Commit
4543e5a
·
verified ·
1 Parent(s): 4d5c16a

added app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -0
app.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import shutil
4
+ import uuid
5
+ import gradio as gr
6
+ import requests
7
+ from PyPDF2 import PdfReader
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ from langchain_community.vectorstores import FAISS
10
+ from langchain_huggingface import HuggingFaceEmbeddings
11
+ from threading import Thread
12
+ from dotenv import load_dotenv
13
+
14
+
15
+ load_dotenv()
16
+
17
+ # === CONFIG ===
18
+ STORAGE_DIR = "storage"
19
+ CLEANUP_INTERVAL = 600 # 10 min
20
+ SESSION_TTL = 1800 # 30 min
21
+ OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
22
+ OPENROUTER_MODEL = "z-ai/glm-4.5-air:free"
23
+
24
+ if not os.path.exists(STORAGE_DIR):
25
+ os.makedirs(STORAGE_DIR)
26
+
27
+ # === CLEANUP THREAD ===
28
+ def cleanup_old_sessions():
29
+ while True:
30
+ now = time.time()
31
+ for folder in os.listdir(STORAGE_DIR):
32
+ path = os.path.join(STORAGE_DIR, folder)
33
+ if os.path.isdir(path) and now - os.path.getmtime(path) > SESSION_TTL:
34
+ shutil.rmtree(path)
35
+ time.sleep(CLEANUP_INTERVAL)
36
+
37
+ Thread(target=cleanup_old_sessions, daemon=True).start()
38
+
39
+ # === PDF PROCESSING ===
40
+ def process_pdf(pdf_file):
41
+ if pdf_file is None:
42
+ return "No file uploaded.", "", []
43
+ session_id = str(uuid.uuid4())
44
+ reader = PdfReader(pdf_file.name)
45
+ text = "".join([page.extract_text() for page in reader.pages if page.extract_text()])
46
+
47
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
48
+ chunks = splitter.split_text(text)
49
+
50
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
51
+ session_path = os.path.join(STORAGE_DIR, session_id)
52
+ os.makedirs(session_path, exist_ok=True)
53
+
54
+ db = FAISS.from_texts(chunks, embeddings)
55
+ db.save_local(session_path)
56
+
57
+ chat_history = [("System", "Paper uploaded and processed. You can now ask questions.")]
58
+ return f"Paper uploaded successfully. Session ID: {session_id}", session_id, chat_history
59
+
60
+ # === QUERY FUNCTION ===
61
+ def query_paper(session_id, user_message, chat_history):
62
+ if not session_id or not os.path.exists(os.path.join(STORAGE_DIR, session_id)):
63
+ chat_history = chat_history or []
64
+ chat_history.append(("System", "Session expired or not found. Upload the paper again."))
65
+ return chat_history, ""
66
+
67
+ if not user_message.strip():
68
+ return chat_history, ""
69
+
70
+ session_path = os.path.join(STORAGE_DIR, session_id)
71
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
72
+ db = FAISS.load_local(session_path, embeddings, allow_dangerous_deserialization=True)
73
+ retriever = db.as_retriever(search_kwargs={"k": 3})
74
+
75
+ # Use invoke() method
76
+ docs = retriever.invoke(user_message)
77
+ context = "\n\n".join([d.page_content for d in docs])
78
+
79
+ prompt = f"""
80
+ You are an AI assistant. Explain the following research paper content in simple terms and answer the question.
81
+ Context from paper:
82
+ {context}
83
+
84
+ Question: {user_message}
85
+ Answer:
86
+ """
87
+
88
+ headers = {"Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json"}
89
+ payload = {
90
+ "model": OPENROUTER_MODEL,
91
+ "messages": [
92
+ {"role": "system", "content": "You are a helpful research explainer."},
93
+ {"role": "user", "content": prompt}
94
+ ]
95
+ }
96
+
97
+ try:
98
+ response = requests.post("https://openrouter.ai/api/v1/chat/completions",
99
+ headers=headers, json=payload)
100
+
101
+ if response.status_code == 200:
102
+ answer = response.json()["choices"][0]["message"]["content"].strip()
103
+ else:
104
+ answer = f"Error: {response.status_code} - {response.text}"
105
+ except Exception as e:
106
+ answer = f"Error: {str(e)}"
107
+
108
+ # Update chat history
109
+ chat_history = chat_history or []
110
+ chat_history.append((user_message, answer))
111
+
112
+ return chat_history, ""
113
+
114
+ # === GRADIO UI ===
115
+ with gr.Blocks() as demo:
116
+ gr.Markdown("## 📄 Research Paper Chatbot (RAG + OpenRouter)")
117
+
118
+ with gr.Row():
119
+ pdf_input = gr.File(label="Upload Research Paper (PDF)", file_types=[".pdf"])
120
+ session_box = gr.Textbox(label="Session ID", interactive=False)
121
+
122
+ chatbot = gr.Chatbot(label="Chat about your paper", height=400)
123
+ user_message = gr.Textbox(label="Ask a question", placeholder="What is this paper about?")
124
+
125
+ with gr.Row():
126
+ upload_btn = gr.Button("Upload Paper", variant="primary")
127
+ ask_btn = gr.Button("Send Question")
128
+ clear_btn = gr.Button("Clear Chat")
129
+
130
+ # Store chat history and session
131
+ state_chat = gr.State([])
132
+ state_session = gr.State("")
133
+
134
+ # Upload button functionality
135
+ def handle_upload(pdf_file):
136
+ status, session_id, chat_history = process_pdf(pdf_file)
137
+ return status, session_id, chat_history
138
+
139
+ upload_btn.click(
140
+ fn=handle_upload,
141
+ inputs=[pdf_input],
142
+ outputs=[session_box, state_session, state_chat]
143
+ )
144
+
145
+ # Ask button functionality
146
+ def handle_question(session_id, message, chat_history):
147
+ updated_chat, _ = query_paper(session_id, message, chat_history)
148
+ return updated_chat, ""
149
+
150
+ ask_btn.click(
151
+ fn=handle_question,
152
+ inputs=[state_session, user_message, state_chat],
153
+ outputs=[chatbot, user_message]
154
+ ).then(
155
+ lambda chat: chat,
156
+ inputs=[chatbot],
157
+ outputs=[state_chat]
158
+ )
159
+
160
+ # Submit on enter
161
+ user_message.submit(
162
+ fn=handle_question,
163
+ inputs=[state_session, user_message, state_chat],
164
+ outputs=[chatbot, user_message]
165
+ ).then(
166
+ lambda chat: chat,
167
+ inputs=[chatbot],
168
+ outputs=[state_chat]
169
+ )
170
+
171
+ # Clear chat
172
+ def clear_chat():
173
+ return [], []
174
+
175
+ clear_btn.click(
176
+ fn=clear_chat,
177
+ outputs=[chatbot, state_chat]
178
+ )
179
+
180
+ # Update chatbot display when chat history changes
181
+ state_chat.change(
182
+ lambda chat: chat,
183
+ inputs=[state_chat],
184
+ outputs=[chatbot]
185
+ )
186
+
187
+ demo.launch(debug=True)