Ani14 commited on
Commit
5c479dc
Β·
verified Β·
1 Parent(s): f8a354f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -132
app.py CHANGED
@@ -5,7 +5,6 @@ import datetime
5
  from dotenv import load_dotenv
6
  from tavily import TavilyClient
7
  import feedparser
8
- import time
9
  from fuzzywuzzy import fuzz
10
  from PIL import Image
11
  from io import BytesIO
@@ -44,14 +43,12 @@ def get_sources(topic, domains=None):
44
  domain_filters = [d.strip() for d in domains.split(",") if d.strip()]
45
  query += " site:" + " OR site:".join(domain_filters)
46
  response = tavily.search(query=query, search_depth="advanced", max_results=10)
47
- sources = []
48
- for item in response.get("results", []):
49
- sources.append({
50
- "title": item.get("title"),
51
- "url": item.get("url"),
52
- "snippet": item.get("content", "")
53
- })
54
- return sources
55
 
56
  def get_arxiv_papers(query):
57
  from urllib.parse import quote_plus
@@ -60,19 +57,24 @@ def get_arxiv_papers(query):
60
  return [{
61
  "title": e.title,
62
  "summary": e.summary.replace("\n", " ").strip(),
63
- "url": next((l.href for l in e.links if l.type == "application/pdf"), "")
 
64
  } for e in feed.entries]
65
 
66
  def get_semantic_papers(query):
67
- url = "https://api.semanticscholar.org/graph/v1/paper/search"
68
- params = {"query": query, "limit": 5, "fields": "title,abstract,url"}
69
- response = requests.get(url, params=params)
70
- papers = response.json().get("data", [])
71
- return [{
72
- "title": p.get("title"),
73
- "summary": p.get("abstract", "No abstract available"),
74
- "url": p.get("url")
75
- } for p in papers]
 
 
 
 
76
 
77
  def check_plagiarism(text, topic):
78
  hits = []
@@ -84,9 +86,7 @@ def check_plagiarism(text, topic):
84
 
85
  def generate_apa_citation(title, url, source):
86
  year = datetime.datetime.now().year
87
- label = {
88
- "arxiv": "*arXiv*", "semantic": "*Semantic Scholar*", "web": "*Web Source*"
89
- }.get(source, "*Web*")
90
  return f"{title}. ({year}). {label}. {url}"
91
 
92
  def merge_duplicates(entries):
@@ -126,143 +126,69 @@ def generate_download_button(file, label, mime_type):
126
  """
127
 
128
  # --- Streamlit UI ---
129
- st.set_page_config("Deep Research Bot", layout="wide", initial_sidebar_state="expanded")
130
  st.markdown("""
131
  <style>
132
- body {
133
- background-color: #1e2a38;
134
- color: #ffffff;
135
- }
136
-
137
- .stApp {
138
- background-color: #1e2a38;
139
- color: #ffffff;
140
- }
141
-
142
- /* Text fields, dropdowns, and inputs */
143
- .stTextInput > div > div > input,
144
- .stSelectbox > div > div > div > div {
145
- background-color: #ffffff;
146
- color: #1e2a38;
147
- }
148
-
149
- /* Fix labels in sidebar to show on dark background */
150
- .stSidebar label,
151
- .stTextInput label,
152
- .stSelectbox label,
153
- .stTextArea label {
154
- color: #1e2a38 !important;
155
- font-weight: bold;
156
- }
157
-
158
- /* Optional: Remove outline color on focus to match dark theme */
159
- input:focus, select:focus {
160
- outline: none !important;
161
- box-shadow: 0 0 0 2px #4f46e5 !important; /* Optional focus ring */
162
- }
163
  </style>
164
  """, unsafe_allow_html=True)
165
 
166
  with st.sidebar:
167
- st.title("Deep Research Assistant")
168
- topic = st.text_input("Topic to research")
169
- report_type = st.selectbox("Type of report", [
170
- "Summary - Short and fast ",
171
- "Detailed Report ",
172
- "Thorough Academic Research "
173
- ])
174
- tone = st.selectbox("Tone of the report", [
175
- "Objective - Impartial and unbiased presentation of facts and findings",
176
- "Persuasive - Advocating a specific point of view",
177
- "Narrative - Storytelling tone for layperson readers"
178
- ])
179
- source_type = st.selectbox("Sources to include", ["Web Only", "Academic Only", "Hybrid"])
180
- custom_domains = st.text_input("Query Domains (Optional)", placeholder="techcrunch.com, forbes.com")
181
- research_button = st.button("Research")
182
 
183
- st.title("Research Output")
184
 
185
  if research_button and topic:
186
  try:
187
- with st.status("Gathering data..."):
188
- st.info("Fetching from sources...")
189
-
190
  all_sources = []
191
- citations = []
192
-
193
  if source_type in ["Web Only", "Hybrid"]:
194
- web_data = get_sources(topic, custom_domains)
195
- for item in web_data:
196
- all_sources.append(item | {"source": "web"})
197
-
198
  if source_type in ["Academic Only", "Hybrid"]:
199
- arxiv_data = get_arxiv_papers(topic)
200
- for item in arxiv_data:
201
- all_sources.append(item | {"source": "arxiv"})
202
- semantic_data = get_semantic_papers(topic)
203
- for item in semantic_data:
204
- all_sources.append(item | {"source": "semantic"})
205
 
206
  merged = merge_duplicates(all_sources)
207
- combined_text = ""
208
- for m in merged:
209
- combined_text += f"- [{m['title']}]({m['url']})\n> {m.get('snippet', m.get('summary', ''))[:300]}...\n\n"
210
- citations.append(generate_apa_citation(m['title'], m['url'], m['source']))
211
-
212
- with st.spinner("Synthesizing report..."):
213
- if report_type == "Summary - Short and fast )":
214
- prompt = f"""
215
- You are a research assistant. Based on the following sources related to the topic **{topic}**, provide a concise overview.
216
- Analyze and summarize based on the selected sources: {'Web Only' if source_type == 'Web Only' else 'Academic Only' if source_type == 'Academic Only' else 'Hybrid (Web + Academic)'}.
217
- Use a clear and accessible tone suitable for readers who want a quick understanding.
218
-
219
- """
220
-
221
- elif report_type == "Detailed Report ":
222
- prompt = f"""
223
- You are a research analyst tasked with writing a structured research brief on the topic **{topic}**.
224
- Use the following sources ({'Web Only' if source_type == 'Web Only' else 'Academic Only' if source_type == 'Academic Only' else 'Hybrid'}) to:
225
- 1. Write an **Introduction/Abstract** giving context and importance of the topic.
226
- 2. Identify and explain the **Research Gap** present in the existing knowledge or implementations.
227
- 3. Propose a **Novel Insight or Contribution** that can address the research gap.
228
- 4. Include a section for **Citations in APA format** using the sources provided.
229
-
230
- """
231
-
232
- else: # Thorough Academic Research
233
- prompt = f"""
234
- You are an expert researcher writing a full academic paper on the topic **{topic}** using sources from {'Web Only' if source_type == 'Web Only' else 'Academic Only' if source_type == 'Academic Only' else 'Hybrid'}.
235
 
236
- The paper should include the following sections:
237
- 1. **Introduction**: Provide context, background, and purpose of the paper.
238
- 2. **Research Gap**: Identify current gaps or underexplored areas in this field.
239
- 3. **Novelty/Contribution**: Describe the new idea, method, or perspective this paper offers.
240
- 4. **Methodology**: Outline methods, models, or frameworks that can be applied to achieve this novelty.
241
- 5. **Comparative Analysis** *(if applicable)*: Compare existing models/methods with the proposed one.
242
- 6. **Future Directions**: Suggest further exploration paths or follow-up research.
243
- 7. **Citations**: Include in-text references and a citation section in **APA format** only.
244
 
245
- """
 
246
 
247
- final_output = call_llm([{"role": "user", "content": prompt}])
 
 
248
 
249
- st.markdown(f" {report_type}")
 
250
  st.markdown(final_output, unsafe_allow_html=True)
251
 
252
- if report_type == "Thorough Academic Research ":
253
- with st.spinner("Preparing PDF and LaTeX..."):
254
- pdf_file = generate_pdf(final_output)
255
- latex_file = generate_latex(final_output)
256
- st.markdown(generate_download_button(pdf_file, "Research_Report.pdf", "application/pdf"), unsafe_allow_html=True)
257
- st.markdown(generate_download_button(latex_file, "Research_Report.tex", "application/x-latex"), unsafe_allow_html=True)
258
 
259
  overlaps = check_plagiarism(final_output, topic)
 
 
260
  if overlaps:
261
  st.warning("Potential overlaps detected:")
262
  for hit in overlaps:
263
  st.markdown(f"- [{hit['title']}]({hit['url']})")
264
  else:
265
- st.success("No major overlaps found.")
266
 
267
  except Exception as e:
268
- st.error(f"Error: {e}")
 
5
  from dotenv import load_dotenv
6
  from tavily import TavilyClient
7
  import feedparser
 
8
  from fuzzywuzzy import fuzz
9
  from PIL import Image
10
  from io import BytesIO
 
43
  domain_filters = [d.strip() for d in domains.split(",") if d.strip()]
44
  query += " site:" + " OR site:".join(domain_filters)
45
  response = tavily.search(query=query, search_depth="advanced", max_results=10)
46
+ return [{
47
+ "title": r["title"],
48
+ "url": r["url"],
49
+ "snippet": r.get("content", ""),
50
+ "source": "web"
51
+ } for r in response.get("results", [])]
 
 
52
 
53
  def get_arxiv_papers(query):
54
  from urllib.parse import quote_plus
 
57
  return [{
58
  "title": e.title,
59
  "summary": e.summary.replace("\n", " ").strip(),
60
+ "url": next((l.href for l in e.links if l.type == "application/pdf"), ""),
61
+ "source": "arxiv"
62
  } for e in feed.entries]
63
 
64
  def get_semantic_papers(query):
65
+ try:
66
+ url = "https://api.semanticscholar.org/graph/v1/paper/search"
67
+ params = {"query": query, "limit": 5, "fields": "title,abstract,url"}
68
+ response = requests.get(url, params=params)
69
+ papers = response.json().get("data", [])
70
+ return [{
71
+ "title": p.get("title"),
72
+ "summary": p.get("abstract", "No abstract available"),
73
+ "url": p.get("url"),
74
+ "source": "semantic"
75
+ } for p in papers]
76
+ except:
77
+ return []
78
 
79
  def check_plagiarism(text, topic):
80
  hits = []
 
86
 
87
  def generate_apa_citation(title, url, source):
88
  year = datetime.datetime.now().year
89
+ label = {"arxiv": "*arXiv*", "semantic": "*Semantic Scholar*", "web": "*Web Source*"}.get(source, "*Web*")
 
 
90
  return f"{title}. ({year}). {label}. {url}"
91
 
92
  def merge_duplicates(entries):
 
126
  """
127
 
128
  # --- Streamlit UI ---
129
+ st.set_page_config("Deep Research Bot", layout="centered")
130
  st.markdown("""
131
  <style>
132
+ .stApp { background-color: #0f172a; color: white; }
133
+ h1, h2, h3 { color: #facc15; }
134
+ .css-1d391kg, .css-1kyxreq, .css-q8sbsg { background-color: #1e293b; color: white; border-radius: 10px; padding: 10px; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  </style>
136
  """, unsafe_allow_html=True)
137
 
138
  with st.sidebar:
139
+ st.title("🧠 Deep Research Assistant")
140
+ topic = st.text_input("πŸ” Enter your research topic")
141
+ report_type = st.selectbox("πŸ“„ Report Type", ["Summary", "Detailed Report", "Thorough Academic Research"])
142
+ tone = st.selectbox("🎯 Tone", ["Objective", "Persuasive", "Narrative"])
143
+ source_type = st.selectbox("πŸ“š Sources", ["Web Only", "Academic Only", "Hybrid"])
144
+ custom_domains = st.text_input("🌐 Optional Web Domains", placeholder="example.com, techcrunch.com")
145
+ research_button = st.button("πŸ”Ž Run Deep Research")
 
 
 
 
 
 
 
 
146
 
147
+ st.title("πŸ“˜ Research Output")
148
 
149
  if research_button and topic:
150
  try:
151
+ with st.spinner("Gathering sources and analyzing deeply..."):
 
 
152
  all_sources = []
 
 
153
  if source_type in ["Web Only", "Hybrid"]:
154
+ all_sources += get_sources(topic, custom_domains)
 
 
 
155
  if source_type in ["Academic Only", "Hybrid"]:
156
+ all_sources += get_arxiv_papers(topic)
157
+ all_sources += get_semantic_papers(topic)
 
 
 
 
158
 
159
  merged = merge_duplicates(all_sources)
160
+ citations = [generate_apa_citation(m['title'], m['url'], m['source']) for m in merged]
161
+ combined_text = "\n\n".join([f"- [{m['title']}]({m['url']})\n> {m.get('snippet', m.get('summary', ''))[:300]}..." for m in merged])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
+ prompt = f"""
164
+ You are an expert assistant. Based on the following sources, write a {report_type.lower()} in a {tone.lower()} tone on the topic: {topic}.
 
 
 
 
 
 
165
 
166
+ Sources:
167
+ {combined_text}
168
 
169
+ APA Citations:
170
+ {chr(10).join(citations)}
171
+ """
172
 
173
+ st.subheader(f"πŸ“ {report_type} on '{topic}'")
174
+ final_output = call_llm([{"role": "user", "content": prompt}])
175
  st.markdown(final_output, unsafe_allow_html=True)
176
 
177
+ if report_type == "Thorough Academic Research":
178
+ st.markdown("---")
179
+ st.subheader("πŸ“„ Downloads")
180
+ st.markdown(generate_download_button(generate_pdf(final_output), "Research_Report.pdf", "application/pdf"), unsafe_allow_html=True)
181
+ st.markdown(generate_download_button(generate_latex(final_output), "Research_Report.tex", "application/x-latex"), unsafe_allow_html=True)
 
182
 
183
  overlaps = check_plagiarism(final_output, topic)
184
+ st.markdown("---")
185
+ st.subheader("πŸ”Ž Plagiarism Check")
186
  if overlaps:
187
  st.warning("Potential overlaps detected:")
188
  for hit in overlaps:
189
  st.markdown(f"- [{hit['title']}]({hit['url']})")
190
  else:
191
+ st.success("βœ… No major overlaps found.")
192
 
193
  except Exception as e:
194
+ st.error(f"❌ Error occurred: {e}")