Ani14 commited on
Commit
bcae0c1
Β·
verified Β·
1 Parent(s): 81824c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -96
app.py CHANGED
@@ -12,51 +12,15 @@ from fpdf import FPDF
12
  from io import BytesIO
13
  import base64
14
  from duckduckgo_search import DDGS
 
15
 
16
- # --- ENV & API KEYS ---
17
  load_dotenv()
18
  OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
19
  TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
20
  tavily = TavilyClient(api_key=TAVILY_API_KEY)
21
 
22
- # --- Streamlit UI ---
23
- st.set_page_config("Deep Research Assistant", layout="centered")
24
-
25
- # --- Mermaid.js for Mind Map ---
26
- st.markdown("""
27
- <script src="https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"></script>
28
- <script>
29
- mermaid.initialize({ startOnLoad: true });
30
- </script>
31
- """, unsafe_allow_html=True)
32
-
33
- # --- Theme ---
34
- st.markdown("""
35
- <style>
36
- .stApp { background-color: #0f172a; color: white; }
37
- h1, h2, h3 { color: #facc15; }
38
- </style>
39
- """, unsafe_allow_html=True)
40
-
41
- # --- Session State Initialization ---
42
- if "last_report" not in st.session_state:
43
- st.session_state.last_report = ""
44
- if "mindmap_triggered" not in st.session_state:
45
- st.session_state.mindmap_triggered = False
46
- if "followup_question" not in st.session_state:
47
- st.session_state.followup_question = ""
48
-
49
- # --- Sidebar Inputs ---
50
- with st.sidebar:
51
- st.title("🧠 Deep Research Assistant")
52
- topic = st.text_input("πŸ” Enter your research topic")
53
- report_type = st.selectbox("πŸ“„ Report Type", ["Summary", "Detailed Report", "Thorough Academic Research"])
54
- tone = st.selectbox("🎯 Tone", ["Objective", "Persuasive", "Narrative"])
55
- source_type = st.selectbox("πŸ“š Sources", ["Web Only", "Academic Only", "Hybrid"])
56
- custom_domains = st.text_input("🌐 Optional Web Domains", placeholder="example.com, forbes.com")
57
- research_button = st.button("πŸš€ Run Deep Research")
58
-
59
- # --- LLM Call ---
60
  def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=3500, temperature=0.7):
61
  url = "https://openrouter.ai/api/v1/chat/completions"
62
  headers = {
@@ -87,15 +51,69 @@ def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=3
87
  except json.JSONDecodeError:
88
  pass
89
 
90
- # --- Source + PDF Helpers ---
91
- def get_sources(query):
 
 
 
92
  response = tavily.search(query=query, search_depth="advanced", max_results=10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  return [{
94
- "title": r["title"],
95
- "url": r["url"],
96
- "snippet": r.get("content", ""),
97
- "source": "web"
98
- } for r in response.get("results", [])]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
  def generate_pdf(text):
101
  pdf = FPDF()
@@ -116,47 +134,74 @@ def generate_pdf(text):
116
 
117
  def generate_download_button(file, label, mime_type):
118
  b64 = base64.b64encode(file.read()).decode()
119
- return f"""
120
- <a href="data:{mime_type};base64,{b64}" download="{label}">
121
- πŸ“₯ Download {label}
122
- </a>
123
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
- # --- Output Area ---
126
  st.title("πŸ“˜ Research Output")
127
 
128
  if research_button and topic:
129
- with st.status("πŸ” Researching..."):
130
- sources = get_sources(topic)
131
- citations = [f"{s['title']} ({datetime.datetime.now().year}). {s['source']}. {s['url']}" for s in sources]
132
- source_text = "\n\n".join([f"- [{s['title']}]({s['url']})\n> {s['snippet']}" for s in sources])
133
-
134
- prompt = f"""
 
 
 
 
 
 
135
  You are an expert research assistant.
136
  1. Analyze the following sources.
137
  2. Identify research gaps and propose a novel topic.
138
  3. Write a {report_type.lower()} in a {tone.lower()} tone.
139
 
140
  Sources:
141
- {source_text}
142
 
143
- Citations:
144
  {chr(10).join(citations)}
145
- """
146
 
147
- st.subheader(f"πŸ“ {report_type} on '{topic}'")
148
- output_placeholder = st.empty()
149
- final_output = ""
150
- for chunk in call_llm([{"role": "user", "content": prompt}]):
151
- final_output += chunk
152
- output_placeholder.markdown(final_output, unsafe_allow_html=True)
153
 
154
- st.session_state.last_report = final_output
155
 
156
- st.subheader("πŸ“„ Downloads")
157
- st.markdown(generate_download_button(generate_pdf(final_output), "Research_Report.pdf", "application/pdf"), unsafe_allow_html=True)
158
 
159
- # --- Mind Map ---
160
  st.subheader("🧭 Visual Mind Map")
161
  if st.button("πŸ—Ί Generate Mind Map"):
162
  st.session_state.mindmap_triggered = True
@@ -164,35 +209,18 @@ if st.button("πŸ—Ί Generate Mind Map"):
164
  if st.session_state.mindmap_triggered and st.session_state.last_report:
165
  try:
166
  mindmap_prompt = [
167
- {"role": "system", "content": "Convert this research report into a mermaid.js mind map."},
168
  {"role": "user", "content": st.session_state.last_report}
169
  ]
170
  mindmap_code = ""
171
  for chunk in call_llm(mindmap_prompt):
172
  mindmap_code += chunk
173
- mindmap_code = mindmap_code.replace("```mermaid", "").replace("```", "").strip()
174
- st.markdown(f"<div class='mermaid'>{mindmap_code}</div>", unsafe_allow_html=True)
175
- except Exception as e:
176
- st.error(f"❌ Mind map error: {e}")
 
 
 
177
  finally:
178
  st.session_state.mindmap_triggered = False
179
-
180
- # --- Follow-Up ---
181
- st.subheader("πŸ’¬ Ask a Follow-Up")
182
- follow_input = st.text_input("Ask a question about the report:")
183
- if st.button("πŸ”„ Submit Follow-Up") and follow_input:
184
- st.session_state.followup_question = follow_input
185
-
186
- if st.session_state.followup_question and st.session_state.last_report:
187
- follow_prompt = [
188
- {"role": "system", "content": "You are a helpful academic assistant."},
189
- {"role": "user", "content": st.session_state.last_report},
190
- {"role": "user", "content": st.session_state.followup_question}
191
- ]
192
- follow_output = ""
193
- follow_box = st.empty()
194
- for chunk in call_llm(follow_prompt):
195
- follow_output += chunk
196
- follow_box.markdown(follow_output, unsafe_allow_html=True)
197
-
198
- st.session_state.followup_question = ""
 
12
  from io import BytesIO
13
  import base64
14
  from duckduckgo_search import DDGS
15
+ import re
16
 
17
+ # Load environment variables
18
  load_dotenv()
19
  OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
20
  TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
21
  tavily = TavilyClient(api_key=TAVILY_API_KEY)
22
 
23
+ # --- Helper Functions ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=3500, temperature=0.7):
25
  url = "https://openrouter.ai/api/v1/chat/completions"
26
  headers = {
 
51
  except json.JSONDecodeError:
52
  pass
53
 
54
+ def get_sources(topic, domains=None):
55
+ query = topic
56
+ if domains:
57
+ domain_filters = [d.strip() for d in domains.split(",") if d.strip()]
58
+ query += " site:" + " OR site:".join(domain_filters)
59
  response = tavily.search(query=query, search_depth="advanced", max_results=10)
60
+ results = []
61
+ for r in response.get("results", []):
62
+ image_url = r.get("image_url")
63
+ if not image_url:
64
+ try:
65
+ images = [img["image"] for img in DDGS().images(r["title"], max_results=1)]
66
+ image_url = images[0] if images else None
67
+ except:
68
+ image_url = None
69
+ results.append({
70
+ "title": r["title"],
71
+ "url": r["url"],
72
+ "snippet": r.get("content", ""),
73
+ "image_url": image_url,
74
+ "source": "web"
75
+ })
76
+ return results
77
+
78
+ def get_arxiv_papers(query):
79
+ from urllib.parse import quote_plus
80
+ url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results=5"
81
+ feed = feedparser.parse(url)
82
  return [{
83
+ "title": e.title,
84
+ "summary": e.summary.replace("\n", " ").strip(),
85
+ "url": next((l.href for l in e.links if l.type == "application/pdf"), ""),
86
+ "source": "arxiv"
87
+ } for e in feed.entries]
88
+
89
+ def get_semantic_papers(query):
90
+ try:
91
+ url = "https://api.semanticscholar.org/graph/v1/paper/search"
92
+ params = {"query": query, "limit": 5, "fields": "title,abstract,url"}
93
+ response = requests.get(url, params=params)
94
+ papers = response.json().get("data", [])
95
+ return [{
96
+ "title": p.get("title"),
97
+ "summary": p.get("abstract", "No abstract available"),
98
+ "url": p.get("url"),
99
+ "source": "semantic"
100
+ } for p in papers]
101
+ except:
102
+ return []
103
+
104
+ def generate_apa_citation(title, url, source):
105
+ year = datetime.datetime.now().year
106
+ label = {"arxiv": "*arXiv*", "semantic": "*Semantic Scholar*", "web": "*Web Source*"}.get(source, "*Web*")
107
+ return f"{title}. ({year}). {label}. {url}"
108
+
109
+ def merge_duplicates(entries):
110
+ unique = []
111
+ seen_titles = []
112
+ for entry in entries:
113
+ if all(fuzz.token_set_ratio(entry['title'], seen) < 90 for seen in seen_titles):
114
+ unique.append(entry)
115
+ seen_titles.append(entry['title'])
116
+ return unique
117
 
118
  def generate_pdf(text):
119
  pdf = FPDF()
 
134
 
135
  def generate_download_button(file, label, mime_type):
136
  b64 = base64.b64encode(file.read()).decode()
137
+ return f"""<a href="data:{mime_type};base64,{b64}" download="{label}">πŸ“₯ Download {label}</a>"""
138
+
139
+ # --- Streamlit UI ---
140
+ st.set_page_config("Deep Research Assistant", layout="centered")
141
+
142
+ if "last_report" not in st.session_state:
143
+ st.session_state.last_report = ""
144
+ if "mindmap_triggered" not in st.session_state:
145
+ st.session_state.mindmap_triggered = False
146
+
147
+ # Mermaid for mind map
148
+ st.markdown("""
149
+ <script src="https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"></script>
150
+ <script>mermaid.initialize({ startOnLoad: true });</script>
151
+ <style>
152
+ .stApp { background-color: #0f172a; color: white; }
153
+ h1, h2, h3 { color: #facc15; }
154
+ </style>
155
+ """, unsafe_allow_html=True)
156
+
157
+ with st.sidebar:
158
+ st.title("🧠 Deep Research Assistant")
159
+ topic = st.text_input("πŸ” Enter your research topic")
160
+ report_type = st.selectbox("πŸ“„ Report Type", ["Summary", "Detailed Report", "Thorough Academic Research"])
161
+ tone = st.selectbox("🎯 Tone", ["Objective", "Persuasive", "Narrative"])
162
+ source_type = st.selectbox("πŸ“š Sources", ["Web Only", "Academic Only", "Hybrid"])
163
+ custom_domains = st.text_input("🌐 Optional Web Domains", placeholder="example.com, forbes.com")
164
+ research_button = st.button("πŸš€ Run Deep Research")
165
 
 
166
  st.title("πŸ“˜ Research Output")
167
 
168
  if research_button and topic:
169
+ sources = []
170
+ if source_type in ["Web Only", "Hybrid"]:
171
+ sources += get_sources(topic, custom_domains)
172
+ if source_type in ["Academic Only", "Hybrid"]:
173
+ sources += get_arxiv_papers(topic)
174
+ sources += get_semantic_papers(topic)
175
+
176
+ merged = merge_duplicates(sources)
177
+ citations = [generate_apa_citation(m['title'], m['url'], m['source']) for m in merged]
178
+ combined_text = "\n\n".join([f"- [{m['title']}]({m['url']})\n> {m.get('snippet', m.get('summary', ''))[:300]}..." for m in merged])
179
+
180
+ prompt = f"""
181
  You are an expert research assistant.
182
  1. Analyze the following sources.
183
  2. Identify research gaps and propose a novel topic.
184
  3. Write a {report_type.lower()} in a {tone.lower()} tone.
185
 
186
  Sources:
187
+ {combined_text}
188
 
189
+ APA Citations:
190
  {chr(10).join(citations)}
191
+ """
192
 
193
+ st.subheader(f"πŸ“ {report_type} on '{topic}'")
194
+ full_output = ""
195
+ for chunk in call_llm([{"role": "user", "content": prompt}]):
196
+ full_output += chunk
197
+ st.markdown(full_output, unsafe_allow_html=True)
 
198
 
199
+ st.session_state.last_report = full_output
200
 
201
+ st.subheader("πŸ“„ Downloads")
202
+ st.markdown(generate_download_button(generate_pdf(full_output), "Research_Report.pdf", "application/pdf"), unsafe_allow_html=True)
203
 
204
+ # πŸ” Mind Map Section
205
  st.subheader("🧭 Visual Mind Map")
206
  if st.button("πŸ—Ί Generate Mind Map"):
207
  st.session_state.mindmap_triggered = True
 
209
  if st.session_state.mindmap_triggered and st.session_state.last_report:
210
  try:
211
  mindmap_prompt = [
212
+ {"role": "system", "content": "You are a mermaid.js expert. Convert the given research report into a valid mermaid.js mind map. Only return the code between ```mermaid and ```."},
213
  {"role": "user", "content": st.session_state.last_report}
214
  ]
215
  mindmap_code = ""
216
  for chunk in call_llm(mindmap_prompt):
217
  mindmap_code += chunk
218
+
219
+ match = re.search(r"```mermaid(.*?)```", mindmap_code, re.DOTALL)
220
+ if match:
221
+ diagram = match.group(1).strip()
222
+ st.markdown(f"<div class='mermaid'>{diagram}</div>", unsafe_allow_html=True)
223
+ else:
224
+ st.warning("⚠️ Mermaid diagram not detected. Try again.")
225
  finally:
226
  st.session_state.mindmap_triggered = False