PraneshJs commited on
Commit
98d82d2
·
verified ·
1 Parent(s): d7e845d

Fixed issue

Browse files
Files changed (1) hide show
  1. app.py +159 -154
app.py CHANGED
@@ -13,8 +13,7 @@ from dotenv import load_dotenv
13
 
14
  load_dotenv()
15
 
16
- #=== CONFIG ===
17
-
18
  STORAGE_DIR = "storage"
19
  CLEANUP_INTERVAL = 600 # 10 min
20
  SESSION_TTL = 1000 # 30 min
@@ -22,171 +21,177 @@ OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
22
  OPENROUTER_MODEL = "z-ai/glm-4.5-air:free"
23
 
24
  if not os.path.exists(STORAGE_DIR):
25
- os.makedirs(STORAGE_DIR)
26
-
27
- #=== CLEANUP THREAD ===
28
 
 
29
  def cleanup_old_sessions():
30
- while True:
31
- now = time.time()
32
- for folder in os.listdir(STORAGE_DIR):
33
- path = os.path.join(STORAGE_DIR, folder)
34
- if os.path.isdir(path) and now - os.path.getmtime(path) > SESSION_TTL:
35
- shutil.rmtree(path)
36
- time.sleep(CLEANUP_INTERVAL)
37
 
38
  Thread(target=cleanup_old_sessions, daemon=True).start()
39
 
40
- #=== PDF PROCESSING ===
41
-
42
  def process_pdf(pdf_file):
43
- if pdf_file is None:
44
- return "No file uploaded.", "", []
45
- session_id = str(uuid.uuid4())
46
- reader = PdfReader(pdf_file.name)
47
- text = "".join([page.extract_text() for page in reader.pages if page.extract_text()])
48
 
49
- splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
50
- chunks = splitter.split_text(text)
 
51
 
52
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
53
- session_path = os.path.join(STORAGE_DIR, session_id)
54
- os.makedirs(session_path, exist_ok=True)
55
 
56
- db = FAISS.from_texts(chunks, embeddings)
57
- db.save_local(session_path)
 
58
 
59
- chat_history = [("System", "Paper uploaded and processed. You can now ask questions.")]
60
- return f"Paper uploaded successfully. Session ID: {session_id}", session_id, chat_history
61
 
62
- #=== QUERY FUNCTION ===
 
63
 
 
64
  def query_paper(session_id, user_message, chat_history):
65
- if not session_id or not os.path.exists(os.path.join(STORAGE_DIR, session_id)):
66
- chat_history = chat_history or []
67
- chat_history.append(("System", "Session expired or not found. Upload the paper again."))
68
- return chat_history, ""
69
-
70
- if not user_message.strip():
71
- return chat_history, ""
72
-
73
- session_path = os.path.join(STORAGE_DIR, session_id)
74
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
75
- db = FAISS.load_local(session_path, embeddings, allow_dangerous_deserialization=True)
76
- retriever = db.as_retriever(search_kwargs={"k": 3})
77
-
78
- # Use invoke() method
79
- docs = retriever.invoke(user_message)
80
- context = "\n\n".join([d.page_content for d in docs])
81
-
82
- prompt = f"""
83
-
84
- You are an AI assistant. Explain the following research paper content in simple terms and answer the question. Use your own knowledge also and make it more technical but simpler explanation should be like professor with high knowledge but teaches in awesome way with more technical stuff but easier
85
- Context from paper:
86
- {context}
87
-
88
- Question: {user_message}
89
- Answer:
90
- """
91
-
92
- headers = {"Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json"}
93
- payload = {
94
- "model": OPENROUTER_MODEL,
95
- "messages": [
96
- {"role": "system", "content": "You are a helpful research paper explainer.Explain all concepts clearly with technical aspects but in a easy way that user can understand easily and gains more knowledge don't be greedy and use more tokens if question is more or it's about the research paper"},
97
- {"role": "user", "content": prompt}
98
- ]
99
- }
100
-
101
- try:
102
- response = requests.post("https://openrouter.ai/api/v1/chat/completions",
103
- headers=headers, json=payload)
104
-
105
- if response.status_code == 200:
106
- answer = response.json()["choices"][0]["message"]["content"].strip()
107
- else:
108
- answer = f"Error: {response.status_code} - {response.text}"
109
- except Exception as e:
110
- answer = f"Error: {str(e)}"
111
-
112
- # Update chat history
113
- chat_history = chat_history or []
114
- chat_history.append((user_message, answer))
115
-
116
- return chat_history, ""
117
-
118
- #=== GRADIO UI ===
119
-
 
 
 
 
 
 
 
 
120
  with gr.Blocks() as demo:
121
- gr.Markdown("## 📄 Research Paper Chatbot (RAG + OpenRouter)")
122
-
123
- with gr.Row():
124
- pdf_input = gr.File(label="Upload Research Paper (PDF)", file_types=[".pdf"])
125
- session_box = gr.Textbox(label="Session ID", interactive=False)
126
-
127
- chatbot = gr.Chatbot(label="Chat about your paper", height=400)
128
- user_message = gr.Textbox(label="Ask a question", placeholder="What is this paper about?")
129
-
130
- with gr.Row():
131
- upload_btn = gr.Button("Upload Paper", variant="primary")
132
- ask_btn = gr.Button("Send Question")
133
- clear_btn = gr.Button("Clear Chat")
134
-
135
- # Store chat history and session
136
- state_chat = gr.State([])
137
- state_session = gr.State("")
138
-
139
- # Upload button functionality
140
- def handle_upload(pdf_file):
141
- status, session_id, chat_history = process_pdf(pdf_file)
142
- return status, session_id, chat_history
143
-
144
- upload_btn.click(
145
- fn=handle_upload,
146
- inputs=[pdf_input],
147
- outputs=[session_box, state_session, state_chat]
148
- )
149
-
150
- # Ask button functionality
151
- def handle_question(session_id, message, chat_history):
152
- updated_chat, _ = query_paper(session_id, message, chat_history)
153
- return updated_chat, ""
154
-
155
- ask_btn.click(
156
- fn=handle_question,
157
- inputs=[state_session, user_message, state_chat],
158
- outputs=[chatbot, user_message]
159
- ).then(
160
- lambda chat: chat,
161
- inputs=[chatbot],
162
- outputs=[state_chat]
163
- )
164
-
165
- # Submit on enter
166
- user_message.submit(
167
- fn=handle_question,
168
- inputs=[state_session, user_message, state_chat],
169
- outputs=[chatbot, user_message]
170
- ).then(
171
- lambda chat: chat,
172
- inputs=[chatbot],
173
- outputs=[state_chat]
174
- )
175
-
176
- # Clear chat
177
- def clear_chat():
178
- return [], []
179
-
180
- clear_btn.click(
181
- fn=clear_chat,
182
- outputs=[chatbot, state_chat]
183
- )
184
-
185
- # Update chatbot display when chat history changes
186
- state_chat.change(
187
- lambda chat: chat,
188
- inputs=[state_chat],
189
- outputs=[chatbot]
190
- )
191
 
192
  demo.launch(debug=True)
 
13
 
14
  load_dotenv()
15
 
16
+ # === CONFIG ===
 
17
  STORAGE_DIR = "storage"
18
  CLEANUP_INTERVAL = 600 # 10 min
19
  SESSION_TTL = 1000 # 30 min
 
21
  OPENROUTER_MODEL = "z-ai/glm-4.5-air:free"
22
 
23
  if not os.path.exists(STORAGE_DIR):
24
+ os.makedirs(STORAGE_DIR)
 
 
25
 
26
+ # === CLEANUP THREAD ===
27
  def cleanup_old_sessions():
28
+ while True:
29
+ now = time.time()
30
+ for folder in os.listdir(STORAGE_DIR):
31
+ path = os.path.join(STORAGE_DIR, folder)
32
+ if os.path.isdir(path) and now - os.path.getmtime(path) > SESSION_TTL:
33
+ shutil.rmtree(path)
34
+ time.sleep(CLEANUP_INTERVAL)
35
 
36
  Thread(target=cleanup_old_sessions, daemon=True).start()
37
 
38
+ # === PDF PROCESSING ===
 
39
  def process_pdf(pdf_file):
40
+ if pdf_file is None:
41
+ return "No file uploaded.", "", []
 
 
 
42
 
43
+ session_id = str(uuid.uuid4())
44
+ reader = PdfReader(pdf_file.name)
45
+ text = "".join([page.extract_text() for page in reader.pages if page.extract_text()])
46
 
47
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
48
+ chunks = splitter.split_text(text)
 
49
 
50
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
51
+ session_path = os.path.join(STORAGE_DIR, session_id)
52
+ os.makedirs(session_path, exist_ok=True)
53
 
54
+ db = FAISS.from_texts(chunks, embeddings)
55
+ db.save_local(session_path)
56
 
57
+ chat_history = [{"role": "system", "content": "Paper uploaded and processed. You can now ask questions."}]
58
+ return f"Paper uploaded successfully. Session ID: {session_id}", session_id, chat_history
59
 
60
+ # === QUERY FUNCTION ===
61
  def query_paper(session_id, user_message, chat_history):
62
+ if not session_id or not os.path.exists(os.path.join(STORAGE_DIR, session_id)):
63
+ chat_history = chat_history or []
64
+ chat_history.append({"role": "system", "content": "Session expired or not found. Upload the paper again."})
65
+ return chat_history, ""
66
+
67
+ if not user_message.strip():
68
+ return chat_history, ""
69
+
70
+ session_path = os.path.join(STORAGE_DIR, session_id)
71
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
72
+ db = FAISS.load_local(session_path, embeddings, allow_dangerous_deserialization=True)
73
+ retriever = db.as_retriever(search_kwargs={"k": 3})
74
+
75
+ # Use invoke() method instead of deprecated get_relevant_documents
76
+ docs = retriever.invoke(user_message)
77
+ context = "\n\n".join([d.page_content for d in docs])
78
+
79
+ prompt = f"""
80
+ You are an AI assistant. Explain the following research paper content in simple terms and answer the question.
81
+ Use your own knowledge also and make it more technical but simpler explanation should be like professor with
82
+ high knowledge but teaches in awesome way with more technical stuff but easier.
83
+
84
+ Context from paper:
85
+ {context}
86
+
87
+ Question: {user_message}
88
+ Answer:
89
+ """
90
+
91
+ headers = {
92
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
93
+ "Content-Type": "application/json"
94
+ }
95
+ payload = {
96
+ "model": OPENROUTER_MODEL,
97
+ "messages": [
98
+ {
99
+ "role": "system",
100
+ "content": "You are a helpful research paper explainer. Explain all concepts clearly with technical aspects but in an easy way."
101
+ },
102
+ {"role": "user", "content": prompt}
103
+ ]
104
+ }
105
+
106
+ try:
107
+ response = requests.post("https://openrouter.ai/api/v1/chat/completions",
108
+ headers=headers, json=payload)
109
+
110
+ if response.status_code == 200:
111
+ answer = response.json()["choices"][0]["message"]["content"].strip()
112
+ else:
113
+ answer = f"Error: {response.status_code} - {response.text}"
114
+ except Exception as e:
115
+ answer = f"Error: {str(e)}"
116
+
117
+ # Update chat history (Gradio expects list of dicts with role + content)
118
+ chat_history = chat_history or []
119
+ chat_history.append({"role": "user", "content": user_message})
120
+ chat_history.append({"role": "assistant", "content": answer})
121
+
122
+ return chat_history, ""
123
+
124
+ # === GRADIO UI ===
125
  with gr.Blocks() as demo:
126
+ gr.Markdown("## 📄 Research Paper Chatbot (RAG + OpenRouter)")
127
+
128
+ with gr.Row():
129
+ pdf_input = gr.File(label="Upload Research Paper (PDF)", file_types=[".pdf"])
130
+ session_box = gr.Textbox(label="Session ID", interactive=False)
131
+
132
+ chatbot = gr.Chatbot(label="Chat about your paper", height=400)
133
+ user_message = gr.Textbox(label="Ask a question", placeholder="What is this paper about?")
134
+
135
+ with gr.Row():
136
+ upload_btn = gr.Button("Upload Paper", variant="primary")
137
+ ask_btn = gr.Button("Send Question")
138
+ clear_btn = gr.Button("Clear Chat")
139
+
140
+ # Store chat history and session
141
+ state_chat = gr.State([])
142
+ state_session = gr.State("")
143
+
144
+ # Upload button functionality
145
+ def handle_upload(pdf_file):
146
+ status, session_id, chat_history = process_pdf(pdf_file)
147
+ return status, session_id, chat_history
148
+
149
+ upload_btn.click(
150
+ fn=handle_upload,
151
+ inputs=[pdf_input],
152
+ outputs=[session_box, state_session, chatbot]
153
+ )
154
+
155
+ # Ask button functionality
156
+ def handle_question(session_id, message, chat_history):
157
+ updated_chat, _ = query_paper(session_id, message, chat_history)
158
+ return updated_chat, ""
159
+
160
+ ask_btn.click(
161
+ fn=handle_question,
162
+ inputs=[state_session, user_message, chatbot],
163
+ outputs=[chatbot, user_message]
164
+ ).then(
165
+ lambda chat: chat,
166
+ inputs=[chatbot],
167
+ outputs=[state_chat]
168
+ )
169
+
170
+ # Submit on enter
171
+ user_message.submit(
172
+ fn=handle_question,
173
+ inputs=[state_session, user_message, chatbot],
174
+ outputs=[chatbot, user_message]
175
+ ).then(
176
+ lambda chat: chat,
177
+ inputs=[chatbot],
178
+ outputs=[state_chat]
179
+ )
180
+
181
+ # Clear chat
182
+ def clear_chat():
183
+ return [], []
184
+
185
+ clear_btn.click(
186
+ fn=clear_chat,
187
+ outputs=[chatbot, state_chat]
188
+ )
189
+
190
+ # Update chatbot display when chat history changes
191
+ state_chat.change(
192
+ lambda chat: chat,
193
+ inputs=[state_chat],
194
+ outputs=[chatbot]
195
+ )
196
 
197
  demo.launch(debug=True)