Mohit0708 commited on
Commit
ae05a75
·
verified ·
1 Parent(s): 56dc425

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +303 -0
app.py ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ==========================================
2
+ # 1. INITIAL SETUP & LIBRARIES
3
+ # ==========================================
4
+ import os
5
+ import json
6
+ import uuid
7
+ import base64
8
+ import whisper
9
+ import pymupdf4llm
10
+ import gradio as gr
11
+ from datetime import datetime
12
+ from huggingface_hub import InferenceClient
13
+ from langchain_text_splitters import MarkdownTextSplitter
14
+ from langchain_huggingface import HuggingFaceEmbeddings
15
+ from langchain_community.vectorstores import Chroma
16
+
17
+ # ==========================================
18
+ # 2. CONNECT TO AI APIS (Replaces Local Models)
19
+ # ==========================================
20
+ print("⏳ Connecting to Hugging Face APIs...")
21
+
22
+ # Get token from environment variable (Set this in HF Spaces Secrets)
23
+ HF_TOKEN = os.environ.get("HF_TOKEN")
24
+
25
+ if not HF_TOKEN:
26
+ print("⚠️ WARNING: HF_TOKEN not found! The AI will not be able to generate responses.")
27
+
28
+ # --- A. Mistral-7B API (The Writer/Scientist) ---
29
+ text_client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3", token=HF_TOKEN)
30
+
31
+ # --- B. Qwen2-VL API (The Eye) ---
32
+ # We use the 7B version since the cloud API handles the compute!
33
+ vision_client = InferenceClient("Qwen/Qwen2-VL-7B-Instruct", token=HF_TOKEN)
34
+
35
+ # --- C. Local Embeddings & Whisper (Runs fine on CPU) ---
36
+ embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
37
+ whisper_model = whisper.load_model("base")
38
+
39
+ print("✅ APIs and Local Models Loaded Successfully!")
40
+
41
+ # ==========================================
42
+ # 3. GLOBAL STATE & HELPERS
43
+ # ==========================================
44
+ main_paper_retriever = None
45
+ brainstorm_retriever = None
46
+ main_extracted_images = []
47
+ chat_history_file = "research_lab_history.json"
48
+
49
+ if not os.path.exists(chat_history_file):
50
+ with open(chat_history_file, "w") as f: json.dump([], f)
51
+
52
+ def save_to_json(user_msg, combined_ans, mode):
53
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
54
+ entry = {"timestamp": timestamp, "mode": mode, "user": user_msg, "assistant": combined_ans}
55
+ try:
56
+ with open(chat_history_file, "r") as f: history = json.load(f)
57
+ except: history = []
58
+ history.append(entry)
59
+ with open(chat_history_file, "w") as f: json.dump(history, f, indent=4)
60
+
61
+ def process_pdf_to_markdown(pdf_path, extract_images=True):
62
+ """Converts PDF to Markdown. Optionally extracts images."""
63
+ global main_extracted_images
64
+ output_image_dir = "extracted_images"
65
+
66
+ if extract_images:
67
+ if os.path.exists(output_image_dir):
68
+ for f in os.listdir(output_image_dir): os.remove(os.path.join(output_image_dir, f))
69
+ else:
70
+ os.makedirs(output_image_dir, exist_ok=True)
71
+
72
+ try:
73
+ if extract_images:
74
+ md_text = pymupdf4llm.to_markdown(pdf_path, write_images=True, image_path=output_image_dir, image_format="png")
75
+ main_extracted_images = [os.path.join(output_image_dir, f) for f in os.listdir(output_image_dir) if f.endswith(('.png', '.jpg'))]
76
+ main_extracted_images.sort()
77
+ else:
78
+ md_text = pymupdf4llm.to_markdown(pdf_path, write_images=False)
79
+ return md_text
80
+ except Exception as e:
81
+ return ""
82
+
83
+ # --- UPLOAD HANDLER 1: MAIN PAPER ---
84
+ def process_main_paper(file_obj):
85
+ global main_paper_retriever
86
+ main_paper_retriever = None
87
+ if file_obj is None: return "⚠️ No file uploaded."
88
+
89
+ try:
90
+ unique_id = f"main_{uuid.uuid4().hex[:8]}"
91
+ md_content = process_pdf_to_markdown(file_obj.name, extract_images=True)
92
+ splitter = MarkdownTextSplitter(chunk_size=1000, chunk_overlap=200)
93
+ chunks = splitter.create_documents([md_content])
94
+ vectordb = Chroma.from_documents(documents=chunks, embedding=embedding_model, collection_name=unique_id)
95
+ main_paper_retriever = vectordb.as_retriever(search_kwargs={"k": 3})
96
+ return f"✅ Main Paper Ready!\n📘 Text: Indexed\n👁️ Images: {len(main_extracted_images)} Extracted"
97
+ except Exception as e:
98
+ return f"❌ Error: {str(e)}"
99
+
100
+ # --- UPLOAD HANDLER 2: REFERENCE SET ---
101
+ def process_brainstorm_papers(file_list):
102
+ global brainstorm_retriever
103
+ brainstorm_retriever = None
104
+ if not file_list: return "⚠️ No files uploaded."
105
+ if len(file_list) > 3: return "⚠️ Limit exceeded: Max 3 PDFs."
106
+
107
+ try:
108
+ combined_md = ""
109
+ names = []
110
+ for file_obj in file_list:
111
+ names.append(os.path.basename(file_obj.name))
112
+ text = process_pdf_to_markdown(file_obj.name, extract_images=False)
113
+ combined_md += f"\n\n--- PAPER: {os.path.basename(file_obj.name)} ---\n{text}\n"
114
+
115
+ unique_id = f"brainstorm_{uuid.uuid4().hex[:8]}"
116
+ splitter = MarkdownTextSplitter(chunk_size=1500, chunk_overlap=300)
117
+ chunks = splitter.create_documents([combined_md])
118
+ vectordb = Chroma.from_documents(documents=chunks, embedding=embedding_model, collection_name=unique_id)
119
+ brainstorm_retriever = vectordb.as_retriever(search_kwargs={"k": 5})
120
+ return f"✅ Knowledge Base Ready!\n📚 Papers: {', '.join(names)}"
121
+ except Exception as e:
122
+ return f"❌ Error: {str(e)}"
123
+
124
+ def transcribe_audio(audio_path):
125
+ if audio_path is None: return ""
126
+ return whisper_model.transcribe(audio_path)["text"].strip()
127
+
128
+ # ==========================================
129
+ # 4. INTELLIGENCE LAYERS (API WRAPPERS)
130
+ # ==========================================
131
+
132
+ # Helper function to call Mistral API
133
+ def ask_mistral(prompt):
134
+ try:
135
+ response = text_client.text_generation(prompt, max_new_tokens=1000, temperature=0.3)
136
+ return response
137
+ except Exception as e:
138
+ return f"⚠️ API Error (Mistral): {str(e)}"
139
+
140
+ # Helper function to call Qwen API
141
+ def ask_qwen(prompt, image_paths):
142
+ try:
143
+ messages = [{"role": "user", "content": []}]
144
+ for img_path in image_paths:
145
+ with open(img_path, "rb") as image_file:
146
+ b64_img = base64.b64encode(image_file.read()).decode('utf-8')
147
+ messages[0]["content"].append({
148
+ "type": "image_url",
149
+ "image_url": {"url": f"data:image/png;base64,{b64_img}"}
150
+ })
151
+ messages[0]["content"].append({"type": "text", "text": prompt})
152
+
153
+ response = vision_client.chat_completion(messages=messages, max_tokens=150)
154
+ return response.choices[0].message.content
155
+ except Exception as e:
156
+ return f"⚠️ API Error (Qwen - Server might be busy): {str(e)}"
157
+
158
+ # MODE 1: CHAT WITH MAIN PAPER
159
+ def get_main_paper_response(question):
160
+ global main_paper_retriever, main_extracted_images
161
+ vision_context = ""
162
+
163
+ # Vision Pass
164
+ if main_extracted_images:
165
+ images_to_process = main_extracted_images[:3]
166
+ vision_prompt = f"Relate these images to: {question}"
167
+ vision_context = ask_qwen(vision_prompt, images_to_process)
168
+
169
+ # Text Pass
170
+ if main_paper_retriever:
171
+ docs = main_paper_retriever.invoke(question)
172
+ text_context = "\n\n".join(d.page_content for d in docs)
173
+ prompt = f"""[INST] Use the context to answer. Integrate visual insights if available.
174
+ Markdown Context: {text_context}
175
+ Visual Insight: {vision_context}
176
+ Question: {question} [/INST]"""
177
+ return ask_mistral(prompt)
178
+ return "⚠️ Please upload Main Paper."
179
+
180
+ # MODE 2: BRAINSTORM NOVELTY
181
+ def get_novelty_response(question):
182
+ global brainstorm_retriever
183
+ if not brainstorm_retriever: return "⚠️ Upload Reference Papers."
184
+
185
+ docs = brainstorm_retriever.invoke(question)
186
+ context = "\n\n".join(d.page_content for d in docs)
187
+ prompt = f"""[INST] You are a Senior Research Scientist.
188
+ Analyze these papers to find gaps and novelty.
189
+ Context: {context}
190
+ Task: Identify limitations in these methodologies and suggest a NOVEL approach or gap.
191
+ Query: {question} [/INST]"""
192
+ return ask_mistral(prompt)
193
+
194
+ # MODE 3: BRAINSTORM SETUP
195
+ def get_setup_response(question):
196
+ global brainstorm_retriever
197
+ if not brainstorm_retriever: return "⚠️ Upload Reference Papers."
198
+
199
+ docs = brainstorm_retriever.invoke(question)
200
+ context = "\n\n".join(d.page_content for d in docs)
201
+ prompt = f"""[INST] You are a Research Architect.
202
+ Based on the methodologies in the context, design a robust EXPERIMENTAL SETUP.
203
+ Context: {context}
204
+ Task: Propose Datasets, Evaluation Metrics, Baselines, and Hardware requirements to validate the proposed novelty.
205
+ Query: {question} [/INST]"""
206
+ return ask_mistral(prompt)
207
+
208
+ # MODE 4: GENERATE PAPER DRAFT
209
+ def get_draft_response(question):
210
+ global brainstorm_retriever
211
+ if not brainstorm_retriever: return "⚠️ Upload Reference Papers."
212
+
213
+ docs = brainstorm_retriever.invoke(question)
214
+ context = "\n\n".join(d.page_content for d in docs)
215
+ prompt = f"""[INST] You are an Academic Writer.
216
+ Write a structured Research Paper Draft (Abstract, Introduction, Methodology, Experiments).
217
+ Use the context from reference papers to write the 'Related Work' section effectively.
218
+ Context: {context}
219
+ Task: Generate a draft for a paper about: {question} [/INST]"""
220
+ return ask_mistral(prompt)
221
+
222
+
223
+ # ==========================================
224
+ # 5. GRADIO UI
225
+ # ==========================================
226
+ def reset_chat(): return []
227
+
228
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
229
+ gr.Markdown("# 🔬 AI Research Scientist Lab (Production Version)")
230
+ gr.Markdown("Pipeline: Analyze -> Find Novelty -> Design Setup -> Write Draft")
231
+
232
+ with gr.Row():
233
+ # --- LEFT: CONTROLS ---
234
+ with gr.Column(scale=1):
235
+
236
+ mode_radio = gr.Radio(
237
+ choices=[
238
+ "1. Chat with Paper",
239
+ "2. Brainstorm Novelty",
240
+ "3. Brainstorm Setup",
241
+ "4. Generate Paper Draft"
242
+ ],
243
+ value="1. Chat with Paper",
244
+ label="🧪 Research Stage"
245
+ )
246
+
247
+ gr.Markdown("---")
248
+ gr.Markdown("### 📂 Input Data")
249
+
250
+ file_main = gr.File(label="Target Paper (Stage 1)", file_types=[".pdf"])
251
+ status_main = gr.Textbox(label="Status", value="Waiting...", interactive=False)
252
+
253
+ file_refs = gr.File(label="Reference Papers (Stages 2-4)", file_types=[".pdf"], file_count="multiple")
254
+ status_refs = gr.Textbox(label="Status", value="Waiting...", interactive=False)
255
+
256
+ clear_btn = gr.Button("🗑️ Clear Workspace")
257
+
258
+ # --- RIGHT: WORKSPACE ---
259
+ with gr.Column(scale=2):
260
+ chatbot = gr.Chatbot(label="Lab Assistant", height=700)
261
+ audio_input = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Dictate Idea")
262
+
263
+ with gr.Row():
264
+ msg_input = gr.Textbox(placeholder="Enter your query or research topic...", scale=4)
265
+ send_btn = gr.Button("🚀 Execute", variant="primary", scale=1)
266
+
267
+ # --- HANDLERS ---
268
+ file_main.change(fn=process_main_paper, inputs=file_main, outputs=status_main)
269
+ file_refs.change(fn=process_brainstorm_papers, inputs=file_refs, outputs=status_refs)
270
+ audio_input.stop_recording(fn=transcribe_audio, inputs=audio_input, outputs=msg_input)
271
+ clear_btn.click(fn=reset_chat, outputs=chatbot)
272
+
273
+ # --- MAIN ROUTER ---
274
+ def respond(message, history, mode):
275
+ if not message.strip(): return "", history
276
+ if history is None: history = []
277
+
278
+ # Route based on selected Stage
279
+ if mode == "1. Chat with Paper":
280
+ response = get_main_paper_response(message)
281
+ elif mode == "2. Brainstorm Novelty":
282
+ response = get_novelty_response(message)
283
+ elif mode == "3. Brainstorm Setup":
284
+ response = get_setup_response(message)
285
+ elif mode == "4. Generate Paper Draft":
286
+ response = get_draft_response(message)
287
+ else:
288
+ response = "Error: Unknown Mode"
289
+
290
+ # Log & Update
291
+ final_ans = f"**[{mode}]**\n{response}"
292
+ save_to_json(message, final_ans, mode)
293
+ history.append({"role": "user", "content": message})
294
+ history.append({"role": "assistant", "content": final_ans})
295
+
296
+ return "", history
297
+
298
+ msg_input.submit(respond, [msg_input, chatbot, mode_radio], [msg_input, chatbot])
299
+ send_btn.click(respond, [msg_input, chatbot, mode_radio], [msg_input, chatbot])
300
+
301
+ print("🚀 Launching Production Research Scientist Lab...")
302
+ # In HF Spaces, we don't need share=True
303
+ demo.launch()