Spaces:

Ani14
/

AutoReasearcher

Sleeping

App Files Files Community

Ani14 commited on Apr 21, 2025

Commit

6407974

verified ·

1 Parent(s): 91282b4

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -80

app.py CHANGED Viewed

@@ -53,10 +53,6 @@ def get_sources(topic, domains=None):
         })
     return sources
-def get_images(topic):
-    response = tavily.image_search(query=topic, max_results=5)
-    return response.get("images", [])
 def get_arxiv_papers(query):
     from urllib.parse import quote_plus
     url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results=5"
@@ -103,18 +99,18 @@ def merge_duplicates(entries):
     return unique
 def generate_pdf(text):
-    text = remove_invalid_unicode(text)
     pdf = FPDF()
     pdf.add_page()
     pdf.set_auto_page_break(auto=True, margin=15)
     pdf.set_font("Arial", size=12)
     for line in text.split('\n'):
         pdf.multi_cell(0, 10, line)
-    pdf_bytes = pdf.output(dest='S').encode('latin1')
-    return BytesIO(pdf_bytes)
 def generate_latex(text):
-    text = remove_invalid_unicode(text)
     latex = "\\documentclass{article}\n\\usepackage{hyperref}\n\\begin{document}\n"
     for line in text.split('\n'):
         latex += line.replace('_', '\\_') + "\\\\\n"
@@ -124,114 +120,121 @@ def generate_latex(text):
 def generate_download_button(file, label, mime_type):
     b64 = base64.b64encode(file.read()).decode()
     return f"""
-        <a href=\"data:{mime_type};base64,{b64}\" download=\"{label}\">📥 Download {label}</a>
     """
 # --- Streamlit UI ---
 st.set_page_config("Deep Research Bot", layout="wide")
 with st.sidebar:
-    st.title("🧠 Deep Research Assistant")
-    topic = st.text_input("💡 Topic to research")
-    report_type = st.selectbox("📄 Type of report", [
         "Summary - Short and fast (~2 min)",
         "Detailed Report (~5 min)",
         "Thorough Academic Research (~10 min)"
     ])
-    tone = st.selectbox("🎯 Tone of the report", [
         "Objective - Impartial and unbiased presentation of facts and findings",
         "Persuasive - Advocating a specific point of view",
         "Narrative - Storytelling tone for layperson readers"
     ])
-    source_type = st.selectbox("🌐 Sources to include", ["Web Only", "Academic Only", "Hybrid"])
-    custom_domains = st.text_input("🔍 Query Domains (Optional)", placeholder="techcrunch.com, forbes.com")
     research_button = st.button("Research")
-st.title("📑 Research Output")
 if research_button and topic:
     try:
-        with st.status("🔍 Gathering data..."):
             st.info("Fetching from sources...")
-            images = get_images(topic)
             if report_type == "Summary - Short and fast (~2 min)":
                 prompt = f"""
-# Research Summary
-Topic: {topic}
 Tone: {tone}
-Type: Summary Only
-Write a clear and concise summary overview of the topic. No detailed sections. Academic tone, short and informative.
                 """
-                final_output = call_llm([{"role": "user", "content": prompt}])
-                st.markdown(f"### 📄 {report_type}")
-                st.markdown(final_output)
-                if images:
-                    st.markdown("### 🖼️ Related Images")
-                    for img_url in images:
-                        st.image(img_url, width=300)
             else:
-                all_sources = []
-                citations = []
-                if source_type in ["Web Only", "Hybrid"]:
-                    web_data = get_sources(topic, custom_domains)
-                    for item in web_data:
-                        all_sources.append(item | {"source": "web"})
-                if source_type in ["Academic Only", "Hybrid"]:
-                    arxiv_data = get_arxiv_papers(topic)
-                    for item in arxiv_data:
-                        all_sources.append(item | {"source": "arxiv"})
-                    semantic_data = get_semantic_papers(topic)
-                    for item in semantic_data:
-                        all_sources.append(item | {"source": "semantic"})
-                merged = merge_duplicates(all_sources)
-                combined_text = ""
-                for m in merged:
-                    combined_text += f"- [{m['title']}]({m['url']})\n> {m.get('snippet', m.get('summary', ''))[:300]}...\n\n"
-                    citations.append(generate_apa_citation(m['title'], m['url'], m['source']))
                 prompt = f"""
 # Research Topic: {topic}
 Tone: {tone}
 Type: {report_type}
 Sources:
 {combined_text}
-Write the report in academic markdown with paragraphs (use bullet points only when necessary). Include:
 1. Introduction
 2. Research Gap
 3. Novel Insight
 4. Application
-5. Full Academic Writeup if Thorough Report
                 """
-                final_output = call_llm([{"role": "user", "content": prompt}])
-                st.markdown(f"### 📄 {report_type}")
-                st.markdown(final_output, unsafe_allow_html=True)
-                st.markdown("### 📚 Citations (APA Format)")
-                for cite in citations:
-                    st.markdown(f"- {cite}")
-                if report_type == "Thorough Academic Research (~10 min)":
-                    with st.spinner("📦 Preparing PDF and LaTeX..."):
-                        pdf_file = generate_pdf(final_output)
-                        latex_file = generate_latex(final_output)
-                        st.markdown(generate_download_button(pdf_file, "Research_Report.pdf", "application/pdf"), unsafe_allow_html=True)
-                        st.markdown(generate_download_button(latex_file, "Research_Report.tex", "application/x-latex"), unsafe_allow_html=True)
-                overlaps = check_plagiarism(final_output, topic)
-                if overlaps:
-                    st.warning("⚠️ Potential overlaps detected:")
-                    for hit in overlaps:
-                        st.markdown(f"- [{hit['title']}]({hit['url']})")
-                else:
-                    st.success("✅ No major overlaps found.")
     except Exception as e:
-        st.error(f"Error: {e}")

         })
     return sources
 def get_arxiv_papers(query):
     from urllib.parse import quote_plus
     url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results=5"
     return unique
 def generate_pdf(text):
     pdf = FPDF()
     pdf.add_page()
     pdf.set_auto_page_break(auto=True, margin=15)
     pdf.set_font("Arial", size=12)
     for line in text.split('\n'):
         pdf.multi_cell(0, 10, line)
+    pdf_output = BytesIO()
+    pdf.output(pdf_output)
+    pdf_output.seek(0)
+    return pdf_output
 def generate_latex(text):
     latex = "\\documentclass{article}\n\\usepackage{hyperref}\n\\begin{document}\n"
     for line in text.split('\n'):
         latex += line.replace('_', '\\_') + "\\\\\n"
 def generate_download_button(file, label, mime_type):
     b64 = base64.b64encode(file.read()).decode()
     return f"""
+        <a href=\"data:{mime_type};base64,{b64}\" download=\"{label}\">
+            📥 Download {label}
+        </a>
     """
 # --- Streamlit UI ---
 st.set_page_config("Deep Research Bot", layout="wide")
 with st.sidebar:
+    st.title("Deep Research Assistant")
+    topic = st.text_input("Topic to research")
+    report_type = st.selectbox("Type of report", [
         "Summary - Short and fast (~2 min)",
         "Detailed Report (~5 min)",
         "Thorough Academic Research (~10 min)"
     ])
+    tone = st.selectbox("Tone of the report", [
         "Objective - Impartial and unbiased presentation of facts and findings",
         "Persuasive - Advocating a specific point of view",
         "Narrative - Storytelling tone for layperson readers"
     ])
+    source_type = st.selectbox("Sources to include", ["Web Only", "Academic Only", "Hybrid"])
+    custom_domains = st.text_input("Query Domains (Optional)", placeholder="techcrunch.com, forbes.com")
     research_button = st.button("Research")
+st.title("Research Output")
 if research_button and topic:
     try:
+        with st.status("Gathering data..."):
             st.info("Fetching from sources...")
+            all_sources = []
+            citations = []
+            if source_type in ["Web Only", "Hybrid"]:
+                web_data = get_sources(topic, custom_domains)
+                for item in web_data:
+                    all_sources.append(item | {"source": "web"})
+            if source_type in ["Academic Only", "Hybrid"]:
+                arxiv_data = get_arxiv_papers(topic)
+                for item in arxiv_data:
+                    all_sources.append(item | {"source": "arxiv"})
+                semantic_data = get_semantic_papers(topic)
+                for item in semantic_data:
+                    all_sources.append(item | {"source": "semantic"})
+            merged = merge_duplicates(all_sources)
+            combined_text = ""
+            for m in merged:
+                combined_text += f"- [{m['title']}]({m['url']})\n> {m.get('snippet', m.get('summary', ''))[:300]}...\n\n"
+                citations.append(generate_apa_citation(m['title'], m['url'], m['source']))
+        with st.spinner("Synthesizing report..."):
             if report_type == "Summary - Short and fast (~2 min)":
                 prompt = f"""
+# Topic Overview: {topic}
 Tone: {tone}
+Provide a concise and informative summary or overview of the topic based on the available sources.
+Sources:
+{combined_text}
+                """
+            elif report_type == "Detailed Report (~5 min)":
+                prompt = f"""
+# Research Topic: {topic}
+Tone: {tone}
+Type: {report_type}
+Sources:
+{combined_text}
+Write a detailed research brief including:
+1. Introduction
+2. Research Gap
+3. Novel Insight
+4. Application
+5. Citations
                 """
             else:
                 prompt = f"""
 # Research Topic: {topic}
 Tone: {tone}
 Type: {report_type}
 Sources:
 {combined_text}
+Write a comprehensive academic paper including:
 1. Introduction
 2. Research Gap
 3. Novel Insight
 4. Application
+5. Full Academic Writeup
+6. Citations in APA format
                 """
+            final_output = call_llm([{"role": "user", "content": prompt}])
+        st.markdown(f"### {report_type}")
+        st.markdown(final_output, unsafe_allow_html=True)
+        st.markdown("### Citations (APA Format)")
+        for cite in citations:
+            st.markdown(f"- {cite}")
+        if report_type == "Thorough Academic Research (~10 min)":
+            with st.spinner("Preparing PDF and LaTeX..."):
+                pdf_file = generate_pdf(final_output)
+                latex_file = generate_latex(final_output)
+                st.markdown(generate_download_button(pdf_file, "Research_Report.pdf", "application/pdf"), unsafe_allow_html=True)
+                st.markdown(generate_download_button(latex_file, "Research_Report.tex", "application/x-latex"), unsafe_allow_html=True)
+        overlaps = check_plagiarism(final_output, topic)
+        if overlaps:
+            st.warning("Potential overlaps detected:")
+            for hit in overlaps:
+                st.markdown(f"- [{hit['title']}]({hit['url']})")
+        else:
+            st.success("No major overlaps found.")
     except Exception as e:
+        st.error(f"Error: {e}")