Ani14 commited on
Commit
05ea5c0
Β·
verified Β·
1 Parent(s): ce5edf3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -58
app.py CHANGED
@@ -12,6 +12,7 @@ from PIL import Image
12
  from io import BytesIO
13
  from fpdf import FPDF
14
  import base64
 
15
  from duckduckgo_search import DDGS
16
 
17
  # --- Load API Keys ---
@@ -22,7 +23,7 @@ tavily = TavilyClient(api_key=TAVILY_API_KEY)
22
 
23
  # --- Helper Functions ---
24
 
25
- def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=20000, temperature=0.7):
26
  url = "https://openrouter.ai/api/v1/chat/completions"
27
  headers = {
28
  "Authorization": f"Bearer {OPENROUTER_API_KEY}",
@@ -114,14 +115,6 @@ def extract_year_from_text(text):
114
  years = re.findall(r"\b(19|20)\d{2}\b", text)
115
  return int(years[0]) if years else 9999
116
 
117
- def generate_apa_citation(title, url, source, year=None):
118
- title = title or "Untitled Source"
119
- url = url or "Unavailable URL"
120
- year = year if isinstance(year, int) and 1900 <= year <= datetime.datetime.now().year else "n.d."
121
- source_map = {"arxiv": "*arXiv*", "semantic": "*Semantic Scholar*", "web": "*Web Source*"}
122
- label = source_map.get(source, "*External Source*")
123
- return f"{title}. ({year}). {label}. {url}"
124
-
125
  def merge_duplicates(entries):
126
  unique = []
127
  seen_titles = []
@@ -148,26 +141,34 @@ def build_chronological_progression(sources):
148
  summary += f"**{year}**\n{entries}\n\n"
149
  return summary.strip()
150
 
151
- # --- Streamlit UI ---
152
- st.set_page_config(page_title="🧠 Deep Research Assistant", layout="centered")
153
 
154
- if "last_report" not in st.session_state:
155
- st.session_state["last_report"] = ""
156
 
 
 
 
 
 
 
 
157
  with st.sidebar:
158
- st.title("🧠 Deep Research Assistant")
159
  topic = st.text_input("πŸ” Enter your research topic")
160
  report_type = st.selectbox("πŸ“„ Report Type", ["Summary", "Detailed Report", "Thorough Academic Research"])
161
  tone = st.selectbox("🎯 Tone", ["Objective", "Persuasive", "Narrative"])
162
  source_type = st.selectbox("πŸ“š Sources", ["Web Only", "Academic Only", "Hybrid"])
163
- custom_domains = st.text_input("🌐 Optional Web Domains", placeholder="example.com, forbes.com")
164
- research_button = st.button("πŸš€ Run Deep Research")
165
 
 
166
  st.title("πŸ“˜ Research Output")
167
 
168
  if research_button and topic:
169
  try:
170
- with st.status("πŸ” Gathering and analyzing sources..."):
171
  all_sources = []
172
  if source_type in ["Web Only", "Hybrid"]:
173
  all_sources += get_sources(topic, custom_domains) if custom_domains.strip() else get_sources(topic)
@@ -182,67 +183,74 @@ if research_button and topic:
182
  merged = sort_sources_chronologically(merged)
183
  chronological_progress = build_chronological_progression(merged)
184
 
185
- # --- Source Image Previews ---
186
- st.subheader("πŸ–Ό Source Previews")
187
- image_shown = False
188
- cols = st.columns(2)
189
- for i, m in enumerate(merged):
190
- if m.get("image_url"):
191
- with cols[i % 2]:
192
- st.image(m["image_url"], caption=m["title"], use_container_width=True)
193
- image_shown = True
194
- if not image_shown:
195
- st.info("ℹ️ No image previews available.")
196
-
197
- # --- Building Final Prompt ---
198
- citations = [generate_apa_citation(m.get('title'), m.get('url'), m.get('source'), m.get('year')) for m in merged]
199
- combined_text = "\n\n".join([
200
- f"- [{m['title']}]({m['url']}) ({m.get('year', 'n.d.')})\n> {m.get('snippet', m.get('summary', ''))[:300]}..."
201
- for m in merged
202
  ])
 
203
  length_instruction = {
204
- "Summary": "Keep it short and focused under 500 words.",
205
- "Detailed Report": "Around 1000+ words with gaps and suggestions.",
206
- "Thorough Academic Research": "Over 5000+ words, full academic LIKE A RESEARCH PAPER FORMAT OF IEEE"
207
  }[report_type]
208
 
 
 
 
 
 
 
209
  prompt = f"""
210
- You are an expert research assistant.
211
 
212
- Your task is to write a {report_type.lower()} in a {tone.lower()} tone.
 
213
 
214
- πŸ” Use the following structure:
215
- 1. Chronological Mapping: based on provided timeline. you should describe the advancements in the topic, what experiments were conducted and what methods were used and how the progression has been made
216
- 2. Gap Identification: what is missing.
217
- 3. Novel Contribution: suggest improvements.-- Suggest a Novel topic based on the gaps and provide an overview on how a researcher can benefit from the topic
218
- 4. Structured Report: headings, clarity- with all headings and discussions like a proper research paper
219
 
220
- βœ‚οΈ Writing Instruction:
221
- {length_instruction}
222
 
223
- πŸ“œ Research Progress Over Time:
224
- {chronological_progress}
225
 
226
- πŸ“š Sources:
227
- {combined_text}
228
 
229
- πŸ“Ž APA Citations:
230
- {chr(10).join(citations)}
231
- """
232
 
233
- # --- Generate Final Output ---
234
  st.subheader(f"πŸ“ {report_type} on '{topic}'")
235
  output_placeholder = st.empty()
236
- full_output = ""
237
  for chunk in call_llm([{"role": "user", "content": prompt}]):
238
- full_output += chunk
239
- output_placeholder.markdown(full_output, unsafe_allow_html=True)
240
 
241
- st.session_state["last_report"] = full_output
 
242
 
243
  except Exception as e:
244
  st.error(f"❌ Error: {e}")
245
 
 
 
 
 
 
 
 
 
 
 
 
246
 
247
  # 🧠 Initialize session state
248
  if "last_report" not in st.session_state:
 
12
  from io import BytesIO
13
  from fpdf import FPDF
14
  import base64
15
+ import uuid
16
  from duckduckgo_search import DDGS
17
 
18
  # --- Load API Keys ---
 
23
 
24
  # --- Helper Functions ---
25
 
26
+ def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=4000, temperature=0.7):
27
  url = "https://openrouter.ai/api/v1/chat/completions"
28
  headers = {
29
  "Authorization": f"Bearer {OPENROUTER_API_KEY}",
 
115
  years = re.findall(r"\b(19|20)\d{2}\b", text)
116
  return int(years[0]) if years else 9999
117
 
 
 
 
 
 
 
 
 
118
  def merge_duplicates(entries):
119
  unique = []
120
  seen_titles = []
 
141
  summary += f"**{year}**\n{entries}\n\n"
142
  return summary.strip()
143
 
144
+ # --- Initialize Streamlit Session ---
145
+ st.set_page_config(page_title="🧠 Deep Research Assistant 2.0", layout="centered")
146
 
147
+ if "memory_bank" not in st.session_state:
148
+ st.session_state.memory_bank = []
149
 
150
+ if "chat_threads" not in st.session_state:
151
+ st.session_state.chat_threads = {}
152
+
153
+ if "current_thread_id" not in st.session_state:
154
+ st.session_state.current_thread_id = None
155
+
156
+ # --- Sidebar Inputs ---
157
  with st.sidebar:
158
+ st.title("Deep Research Assistant 2.0 πŸš€")
159
  topic = st.text_input("πŸ” Enter your research topic")
160
  report_type = st.selectbox("πŸ“„ Report Type", ["Summary", "Detailed Report", "Thorough Academic Research"])
161
  tone = st.selectbox("🎯 Tone", ["Objective", "Persuasive", "Narrative"])
162
  source_type = st.selectbox("πŸ“š Sources", ["Web Only", "Academic Only", "Hybrid"])
163
+ custom_domains = st.text_input("🌐 Optional Domains", placeholder="example.com, nature.com")
164
+ research_button = st.button("πŸš€ Start Research")
165
 
166
+ # --- Main Logic ---
167
  st.title("πŸ“˜ Research Output")
168
 
169
  if research_button and topic:
170
  try:
171
+ with st.status("πŸ”Ž Gathering sources..."):
172
  all_sources = []
173
  if source_type in ["Web Only", "Hybrid"]:
174
  all_sources += get_sources(topic, custom_domains) if custom_domains.strip() else get_sources(topic)
 
183
  merged = sort_sources_chronologically(merged)
184
  chronological_progress = build_chronological_progression(merged)
185
 
186
+ # Prepare previous learnings
187
+ previous_learnings = "\n\n".join(st.session_state.memory_bank[-5:]) # last 5 learnings
188
+
189
+ citations = [f"- {s['title']} ({s['year']}) [{s['source']}]({s['url']})" for s in merged]
190
+ sources_text = "\n".join([
191
+ f"- [{s['title']}]({s['url']}) ({s['year']})\n> {s.get('snippet', s.get('summary', ''))[:300]}..."
192
+ for s in merged
 
 
 
 
 
 
 
 
 
 
193
  ])
194
+
195
  length_instruction = {
196
+ "Summary": "Keep it concise, under 300 words.",
197
+ "Detailed Report": "Write 500-700 words with critical insights.",
198
+ "Thorough Academic Research": "Craft a full academic paper >1000 words."
199
  }[report_type]
200
 
201
+ # Create Thread ID
202
+ thread_id = str(uuid.uuid4())
203
+ st.session_state.current_thread_id = thread_id
204
+ st.session_state.chat_threads[thread_id] = []
205
+
206
+ # --- LLM Prompt ---
207
  prompt = f"""
208
+ You are an expert research assistant.
209
 
210
+ πŸ”΅ Past Knowledge:
211
+ {previous_learnings}
212
 
213
+ πŸ”΅ New Research Topic:
214
+ {topic}
 
 
 
215
 
216
+ πŸ”΅ Writing Style:
217
+ {tone} tone, {length_instruction}
218
 
219
+ πŸ”΅ Research Timeline:
220
+ {chronological_progress}
221
 
222
+ πŸ”΅ Sources:
223
+ {sources_text}
224
 
225
+ πŸ”΅ Citations:
226
+ {chr(10).join(citations)}
227
+ """
228
 
229
+ # --- Generate Report ---
230
  st.subheader(f"πŸ“ {report_type} on '{topic}'")
231
  output_placeholder = st.empty()
232
+ final_output = ""
233
  for chunk in call_llm([{"role": "user", "content": prompt}]):
234
+ final_output += chunk
235
+ output_placeholder.markdown(final_output, unsafe_allow_html=True)
236
 
237
+ st.session_state.memory_bank.append(final_output)
238
+ st.session_state.chat_threads[thread_id].append({"role": "assistant", "content": final_output})
239
 
240
  except Exception as e:
241
  st.error(f"❌ Error: {e}")
242
 
243
+ # --- Show Chat Threads ---
244
+ st.divider()
245
+ st.subheader("πŸ“‚ Your Research Threads")
246
+
247
+ for tid, chats in st.session_state.chat_threads.items():
248
+ with st.expander(f"Thread {tid[:8]}"):
249
+ for msg in chats:
250
+ role = "πŸ§‘ You" if msg['role'] == 'user' else "πŸ€– Assistant"
251
+ st.markdown(f"**{role}:** {msg['content']}")
252
+
253
+
254
 
255
  # 🧠 Initialize session state
256
  if "last_report" not in st.session_state: