Ani14 commited on
Commit
65eddf9
·
verified ·
1 Parent(s): 83559fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -140
app.py CHANGED
@@ -106,11 +106,19 @@ def generate_pdf(text):
106
  pdf = FPDF()
107
  pdf.add_page()
108
  pdf.set_auto_page_break(auto=True, margin=15)
109
- pdf.set_font("Arial", size=12)
110
- for line in text.split('\n'):
111
- pdf.multi_cell(0, 10, line)
112
- pdf_output = BytesIO()
113
- pdf.output(pdf_output)
 
 
 
 
 
 
 
 
114
  pdf_output.seek(0)
115
  return pdf_output
116
 
@@ -132,138 +140,3 @@ def download_image_as_bytes(url):
132
  if response.status_code == 200:
133
  return BytesIO(response.content)
134
  return None
135
-
136
- # --- Streamlit UI ---
137
- st.set_page_config("Deep Research Bot", layout="wide")
138
-
139
- with st.sidebar:
140
- st.title("Deep Research Assistant")
141
- topic = st.text_input("Topic to research")
142
- report_type = st.selectbox("Type of report", [
143
- "Summary - Short and fast (~2 min)",
144
- "Detailed Report (~5 min)",
145
- "Thorough Academic Research (~10 min)"
146
- ])
147
- tone = st.selectbox("Tone of the report", [
148
- "Objective - Impartial and unbiased presentation of facts and findings",
149
- "Persuasive - Advocating a specific point of view",
150
- "Narrative - Storytelling tone for layperson readers"
151
- ])
152
- source_type = st.selectbox("Sources to include", ["Web Only", "Academic Only", "Hybrid"])
153
- custom_domains = st.text_input("Query Domains (Optional)", placeholder="techcrunch.com, forbes.com")
154
- research_button = st.button("Research")
155
-
156
- st.title("Research Output")
157
-
158
- if research_button and topic:
159
- try:
160
- with st.status("Gathering data..."):
161
- st.info("Fetching from sources...")
162
-
163
- all_sources = []
164
- citations = []
165
-
166
- if source_type in ["Web Only", "Hybrid"]:
167
- web_data = get_sources(topic, custom_domains)
168
- for item in web_data:
169
- all_sources.append(item | {"source": "web"})
170
-
171
- if source_type in ["Academic Only", "Hybrid"]:
172
- arxiv_data = get_arxiv_papers(topic)
173
- for item in arxiv_data:
174
- all_sources.append(item | {"source": "arxiv"})
175
- semantic_data = get_semantic_papers(topic)
176
- for item in semantic_data:
177
- all_sources.append(item | {"source": "semantic"})
178
-
179
- merged = merge_duplicates(all_sources)
180
- combined_text = ""
181
- for m in merged:
182
- combined_text += f"- [{m['title']}]({m['url']})\n> {m.get('snippet', m.get('summary', ''))[:300]}...\n\n"
183
- citations.append(generate_apa_citation(m['title'], m['url'], m['source']))
184
-
185
- with st.spinner("Synthesizing report..."):
186
- if "Summary" in report_type:
187
- prompt = f"""
188
- # Topic Overview: {topic}
189
- Tone: {tone}
190
- Sources:
191
- {combined_text}
192
- Write a brief summary that introduces the topic, key findings, and general importance. Use markdown.
193
- """
194
- elif "Detailed Report" in report_type:
195
- prompt = f"""
196
- # Research Topic: {topic}
197
- Tone: {tone}
198
- Sources:
199
- {combined_text}
200
- Write a structured report in markdown including:
201
- 1. Introduction
202
- 2. Research Gap
203
- 3. Novel Insight
204
- 4. Path Forward to Bridge the Research Gap
205
- 5. Citations
206
- """
207
- else:
208
- prompt = f"""
209
- # Thorough Academic Research Paper
210
- Topic: {topic}
211
- Tone: {tone}
212
- Sources:
213
- {combined_text}
214
- Write a detailed research paper in academic markdown with these sections:
215
- 1. Abstract
216
- 2. Introduction
217
- 3. Literature Review
218
- 4. Research Gap
219
- 5. Proposed Methodology or Novel Insight
220
- 6. Applications and Implications
221
- 7. Conclusion
222
- 8. References in APA format
223
- Also, suggest 1-2 relevant open-license images and include their links.
224
- """
225
-
226
- final_output = call_llm([{"role": "user", "content": prompt}])
227
-
228
- st.markdown(f"### {report_type}")
229
- st.markdown(final_output, unsafe_allow_html=True)
230
-
231
- with st.spinner("Preparing PDF and LaTeX..."):
232
- pdf_file = generate_pdf(final_output)
233
- latex_file = generate_latex(final_output)
234
- st.markdown(generate_download_button(pdf_file, "Research_Report.pdf", "application/pdf"), unsafe_allow_html=True)
235
- st.markdown(generate_download_button(latex_file, "Research_Report.tex", "application/x-latex"), unsafe_allow_html=True)
236
-
237
- st.markdown("### Citations (APA Format)")
238
- for cite in citations:
239
- st.markdown(f"- {cite}")
240
-
241
- st.markdown("### Topic-Related Images")
242
- images = get_images(topic)
243
- if images:
244
- cols = st.columns(len(images))
245
- for i, img in enumerate(images):
246
- with cols[i]:
247
- try:
248
- image_bytes = requests.get(img["url"]).content
249
- st.image(image_bytes, caption=img.get("title", "Related Image"), use_column_width=True)
250
- image_data = download_image_as_bytes(img["url"])
251
- if image_data:
252
- b64_img = base64.b64encode(image_data.read()).decode()
253
- href = f'<a href="data:image/jpeg;base64,{b64_img}" download="image_{i+1}.jpg">Download</a>'
254
- st.markdown(href, unsafe_allow_html=True)
255
- except Exception as e:
256
- st.warning(f"Could not load image: {e}")
257
- else:
258
- st.info("No related images found.")
259
-
260
- overlaps = check_plagiarism(final_output, topic)
261
- if overlaps:
262
- st.warning("Potential overlaps detected:")
263
- for hit in overlaps:
264
- st.markdown(f"- [{hit['title']}]({hit['url']})")
265
- else:
266
- st.success("No major overlaps found.")
267
-
268
- except Exception as e:
269
- st.error(f"Error: {e}")
 
106
  pdf = FPDF()
107
  pdf.add_page()
108
  pdf.set_auto_page_break(auto=True, margin=15)
109
+ lines = text.split('\n')
110
+ for line in lines:
111
+ if line.startswith("# "):
112
+ pdf.set_font("Arial", style="B", size=16)
113
+ pdf.multi_cell(0, 10, line[2:])
114
+ elif line.startswith("## "):
115
+ pdf.set_font("Arial", style="B", size=14)
116
+ pdf.multi_cell(0, 10, line[3:])
117
+ else:
118
+ pdf.set_font("Arial", size=12)
119
+ pdf.multi_cell(0, 8, line)
120
+ pdf_bytes = pdf.output(dest='S').encode('latin-1')
121
+ pdf_output = BytesIO(pdf_bytes)
122
  pdf_output.seek(0)
123
  return pdf_output
124
 
 
140
  if response.status_code == 200:
141
  return BytesIO(response.content)
142
  return None