Ani14 commited on
Commit
6407974
Β·
verified Β·
1 Parent(s): 91282b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -80
app.py CHANGED
@@ -53,10 +53,6 @@ def get_sources(topic, domains=None):
53
  })
54
  return sources
55
 
56
- def get_images(topic):
57
- response = tavily.image_search(query=topic, max_results=5)
58
- return response.get("images", [])
59
-
60
  def get_arxiv_papers(query):
61
  from urllib.parse import quote_plus
62
  url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results=5"
@@ -103,18 +99,18 @@ def merge_duplicates(entries):
103
  return unique
104
 
105
  def generate_pdf(text):
106
- text = remove_invalid_unicode(text)
107
  pdf = FPDF()
108
  pdf.add_page()
109
  pdf.set_auto_page_break(auto=True, margin=15)
110
  pdf.set_font("Arial", size=12)
111
  for line in text.split('\n'):
112
  pdf.multi_cell(0, 10, line)
113
- pdf_bytes = pdf.output(dest='S').encode('latin1')
114
- return BytesIO(pdf_bytes)
 
 
115
 
116
  def generate_latex(text):
117
- text = remove_invalid_unicode(text)
118
  latex = "\\documentclass{article}\n\\usepackage{hyperref}\n\\begin{document}\n"
119
  for line in text.split('\n'):
120
  latex += line.replace('_', '\\_') + "\\\\\n"
@@ -124,114 +120,121 @@ def generate_latex(text):
124
  def generate_download_button(file, label, mime_type):
125
  b64 = base64.b64encode(file.read()).decode()
126
  return f"""
127
- <a href=\"data:{mime_type};base64,{b64}\" download=\"{label}\">πŸ“₯ Download {label}</a>
 
 
128
  """
129
 
130
  # --- Streamlit UI ---
131
  st.set_page_config("Deep Research Bot", layout="wide")
132
 
133
  with st.sidebar:
134
- st.title("🧠 Deep Research Assistant")
135
- topic = st.text_input("πŸ’‘ Topic to research")
136
- report_type = st.selectbox("πŸ“„ Type of report", [
137
  "Summary - Short and fast (~2 min)",
138
  "Detailed Report (~5 min)",
139
  "Thorough Academic Research (~10 min)"
140
  ])
141
- tone = st.selectbox("🎯 Tone of the report", [
142
  "Objective - Impartial and unbiased presentation of facts and findings",
143
  "Persuasive - Advocating a specific point of view",
144
  "Narrative - Storytelling tone for layperson readers"
145
  ])
146
- source_type = st.selectbox("🌐 Sources to include", ["Web Only", "Academic Only", "Hybrid"])
147
- custom_domains = st.text_input("πŸ” Query Domains (Optional)", placeholder="techcrunch.com, forbes.com")
148
  research_button = st.button("Research")
149
 
150
- st.title("πŸ“‘ Research Output")
151
 
152
  if research_button and topic:
153
  try:
154
- with st.status("πŸ” Gathering data..."):
155
  st.info("Fetching from sources...")
156
 
157
- images = get_images(topic)
158
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  if report_type == "Summary - Short and fast (~2 min)":
160
  prompt = f"""
161
- # Research Summary
162
- Topic: {topic}
163
  Tone: {tone}
164
- Type: Summary Only
165
- Write a clear and concise summary overview of the topic. No detailed sections. Academic tone, short and informative.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  """
167
- final_output = call_llm([{"role": "user", "content": prompt}])
168
- st.markdown(f"### πŸ“„ {report_type}")
169
- st.markdown(final_output)
170
-
171
- if images:
172
- st.markdown("### πŸ–ΌοΈ Related Images")
173
- for img_url in images:
174
- st.image(img_url, width=300)
175
-
176
  else:
177
- all_sources = []
178
- citations = []
179
-
180
- if source_type in ["Web Only", "Hybrid"]:
181
- web_data = get_sources(topic, custom_domains)
182
- for item in web_data:
183
- all_sources.append(item | {"source": "web"})
184
-
185
- if source_type in ["Academic Only", "Hybrid"]:
186
- arxiv_data = get_arxiv_papers(topic)
187
- for item in arxiv_data:
188
- all_sources.append(item | {"source": "arxiv"})
189
- semantic_data = get_semantic_papers(topic)
190
- for item in semantic_data:
191
- all_sources.append(item | {"source": "semantic"})
192
-
193
- merged = merge_duplicates(all_sources)
194
- combined_text = ""
195
- for m in merged:
196
- combined_text += f"- [{m['title']}]({m['url']})\n> {m.get('snippet', m.get('summary', ''))[:300]}...\n\n"
197
- citations.append(generate_apa_citation(m['title'], m['url'], m['source']))
198
-
199
  prompt = f"""
200
  # Research Topic: {topic}
201
  Tone: {tone}
202
  Type: {report_type}
203
  Sources:
204
  {combined_text}
205
- Write the report in academic markdown with paragraphs (use bullet points only when necessary). Include:
206
  1. Introduction
207
  2. Research Gap
208
  3. Novel Insight
209
  4. Application
210
- 5. Full Academic Writeup if Thorough Report
 
211
  """
212
- final_output = call_llm([{"role": "user", "content": prompt}])
213
-
214
- st.markdown(f"### πŸ“„ {report_type}")
215
- st.markdown(final_output, unsafe_allow_html=True)
216
-
217
- st.markdown("### πŸ“š Citations (APA Format)")
218
- for cite in citations:
219
- st.markdown(f"- {cite}")
220
-
221
- if report_type == "Thorough Academic Research (~10 min)":
222
- with st.spinner("πŸ“¦ Preparing PDF and LaTeX..."):
223
- pdf_file = generate_pdf(final_output)
224
- latex_file = generate_latex(final_output)
225
- st.markdown(generate_download_button(pdf_file, "Research_Report.pdf", "application/pdf"), unsafe_allow_html=True)
226
- st.markdown(generate_download_button(latex_file, "Research_Report.tex", "application/x-latex"), unsafe_allow_html=True)
227
-
228
- overlaps = check_plagiarism(final_output, topic)
229
- if overlaps:
230
- st.warning("⚠️ Potential overlaps detected:")
231
- for hit in overlaps:
232
- st.markdown(f"- [{hit['title']}]({hit['url']})")
233
- else:
234
- st.success("βœ… No major overlaps found.")
235
 
236
  except Exception as e:
237
- st.error(f"Error: {e}")
 
53
  })
54
  return sources
55
 
 
 
 
 
56
  def get_arxiv_papers(query):
57
  from urllib.parse import quote_plus
58
  url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results=5"
 
99
  return unique
100
 
101
  def generate_pdf(text):
 
102
  pdf = FPDF()
103
  pdf.add_page()
104
  pdf.set_auto_page_break(auto=True, margin=15)
105
  pdf.set_font("Arial", size=12)
106
  for line in text.split('\n'):
107
  pdf.multi_cell(0, 10, line)
108
+ pdf_output = BytesIO()
109
+ pdf.output(pdf_output)
110
+ pdf_output.seek(0)
111
+ return pdf_output
112
 
113
  def generate_latex(text):
 
114
  latex = "\\documentclass{article}\n\\usepackage{hyperref}\n\\begin{document}\n"
115
  for line in text.split('\n'):
116
  latex += line.replace('_', '\\_') + "\\\\\n"
 
120
  def generate_download_button(file, label, mime_type):
121
  b64 = base64.b64encode(file.read()).decode()
122
  return f"""
123
+ <a href=\"data:{mime_type};base64,{b64}\" download=\"{label}\">
124
+ πŸ“₯ Download {label}
125
+ </a>
126
  """
127
 
128
  # --- Streamlit UI ---
129
  st.set_page_config("Deep Research Bot", layout="wide")
130
 
131
  with st.sidebar:
132
+ st.title("Deep Research Assistant")
133
+ topic = st.text_input("Topic to research")
134
+ report_type = st.selectbox("Type of report", [
135
  "Summary - Short and fast (~2 min)",
136
  "Detailed Report (~5 min)",
137
  "Thorough Academic Research (~10 min)"
138
  ])
139
+ tone = st.selectbox("Tone of the report", [
140
  "Objective - Impartial and unbiased presentation of facts and findings",
141
  "Persuasive - Advocating a specific point of view",
142
  "Narrative - Storytelling tone for layperson readers"
143
  ])
144
+ source_type = st.selectbox("Sources to include", ["Web Only", "Academic Only", "Hybrid"])
145
+ custom_domains = st.text_input("Query Domains (Optional)", placeholder="techcrunch.com, forbes.com")
146
  research_button = st.button("Research")
147
 
148
+ st.title("Research Output")
149
 
150
  if research_button and topic:
151
  try:
152
+ with st.status("Gathering data..."):
153
  st.info("Fetching from sources...")
154
 
155
+ all_sources = []
156
+ citations = []
157
+
158
+ if source_type in ["Web Only", "Hybrid"]:
159
+ web_data = get_sources(topic, custom_domains)
160
+ for item in web_data:
161
+ all_sources.append(item | {"source": "web"})
162
+
163
+ if source_type in ["Academic Only", "Hybrid"]:
164
+ arxiv_data = get_arxiv_papers(topic)
165
+ for item in arxiv_data:
166
+ all_sources.append(item | {"source": "arxiv"})
167
+ semantic_data = get_semantic_papers(topic)
168
+ for item in semantic_data:
169
+ all_sources.append(item | {"source": "semantic"})
170
+
171
+ merged = merge_duplicates(all_sources)
172
+ combined_text = ""
173
+ for m in merged:
174
+ combined_text += f"- [{m['title']}]({m['url']})\n> {m.get('snippet', m.get('summary', ''))[:300]}...\n\n"
175
+ citations.append(generate_apa_citation(m['title'], m['url'], m['source']))
176
+
177
+ with st.spinner("Synthesizing report..."):
178
  if report_type == "Summary - Short and fast (~2 min)":
179
  prompt = f"""
180
+ # Topic Overview: {topic}
 
181
  Tone: {tone}
182
+ Provide a concise and informative summary or overview of the topic based on the available sources.
183
+ Sources:
184
+ {combined_text}
185
+ """
186
+ elif report_type == "Detailed Report (~5 min)":
187
+ prompt = f"""
188
+ # Research Topic: {topic}
189
+ Tone: {tone}
190
+ Type: {report_type}
191
+ Sources:
192
+ {combined_text}
193
+ Write a detailed research brief including:
194
+ 1. Introduction
195
+ 2. Research Gap
196
+ 3. Novel Insight
197
+ 4. Application
198
+ 5. Citations
199
  """
 
 
 
 
 
 
 
 
 
200
  else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  prompt = f"""
202
  # Research Topic: {topic}
203
  Tone: {tone}
204
  Type: {report_type}
205
  Sources:
206
  {combined_text}
207
+ Write a comprehensive academic paper including:
208
  1. Introduction
209
  2. Research Gap
210
  3. Novel Insight
211
  4. Application
212
+ 5. Full Academic Writeup
213
+ 6. Citations in APA format
214
  """
215
+ final_output = call_llm([{"role": "user", "content": prompt}])
216
+
217
+ st.markdown(f"### {report_type}")
218
+ st.markdown(final_output, unsafe_allow_html=True)
219
+
220
+ st.markdown("### Citations (APA Format)")
221
+ for cite in citations:
222
+ st.markdown(f"- {cite}")
223
+
224
+ if report_type == "Thorough Academic Research (~10 min)":
225
+ with st.spinner("Preparing PDF and LaTeX..."):
226
+ pdf_file = generate_pdf(final_output)
227
+ latex_file = generate_latex(final_output)
228
+ st.markdown(generate_download_button(pdf_file, "Research_Report.pdf", "application/pdf"), unsafe_allow_html=True)
229
+ st.markdown(generate_download_button(latex_file, "Research_Report.tex", "application/x-latex"), unsafe_allow_html=True)
230
+
231
+ overlaps = check_plagiarism(final_output, topic)
232
+ if overlaps:
233
+ st.warning("Potential overlaps detected:")
234
+ for hit in overlaps:
235
+ st.markdown(f"- [{hit['title']}]({hit['url']})")
236
+ else:
237
+ st.success("No major overlaps found.")
238
 
239
  except Exception as e:
240
+ st.error(f"Error: {e}")