Spaces:

Ani14
/

AutoReasearcher

Sleeping

App Files Files Community

Ani14 commited on Apr 22, 2025

Commit

5c479dc

verified ·

1 Parent(s): f8a354f

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -132

app.py CHANGED Viewed

@@ -5,7 +5,6 @@ import datetime
 from dotenv import load_dotenv
 from tavily import TavilyClient
 import feedparser
-import time
 from fuzzywuzzy import fuzz
 from PIL import Image
 from io import BytesIO
@@ -44,14 +43,12 @@ def get_sources(topic, domains=None):
         domain_filters = [d.strip() for d in domains.split(",") if d.strip()]
         query += " site:" + " OR site:".join(domain_filters)
     response = tavily.search(query=query, search_depth="advanced", max_results=10)
-    sources = []
-    for item in response.get("results", []):
-        sources.append({
-            "title": item.get("title"),
-            "url": item.get("url"),
-            "snippet": item.get("content", "")
-        })
-    return sources
 def get_arxiv_papers(query):
     from urllib.parse import quote_plus
@@ -60,19 +57,24 @@ def get_arxiv_papers(query):
     return [{
         "title": e.title,
         "summary": e.summary.replace("\n", " ").strip(),
-        "url": next((l.href for l in e.links if l.type == "application/pdf"), "")
     } for e in feed.entries]
 def get_semantic_papers(query):
-    url = "https://api.semanticscholar.org/graph/v1/paper/search"
-    params = {"query": query, "limit": 5, "fields": "title,abstract,url"}
-    response = requests.get(url, params=params)
-    papers = response.json().get("data", [])
-    return [{
-        "title": p.get("title"),
-        "summary": p.get("abstract", "No abstract available"),
-        "url": p.get("url")
-    } for p in papers]
 def check_plagiarism(text, topic):
     hits = []
@@ -84,9 +86,7 @@ def check_plagiarism(text, topic):
 def generate_apa_citation(title, url, source):
     year = datetime.datetime.now().year
-    label = {
-        "arxiv": "*arXiv*", "semantic": "*Semantic Scholar*", "web": "*Web Source*"
-    }.get(source, "*Web*")
     return f"{title}. ({year}). {label}. {url}"
 def merge_duplicates(entries):
@@ -126,143 +126,69 @@ def generate_download_button(file, label, mime_type):
     """
 # --- Streamlit UI ---
-st.set_page_config("Deep Research Bot", layout="wide", initial_sidebar_state="expanded")
 st.markdown("""
     <style>
-    body {
-        background-color: #1e2a38;
-        color: #ffffff;
-    }
-    .stApp {
-        background-color: #1e2a38;
-        color: #ffffff;
-    }
-    /* Text fields, dropdowns, and inputs */
-    .stTextInput > div > div > input,
-    .stSelectbox > div > div > div > div {
-        background-color: #ffffff;
-        color: #1e2a38;
-    }
-    /* Fix labels in sidebar to show on dark background */
-    .stSidebar label,
-    .stTextInput label,
-    .stSelectbox label,
-    .stTextArea label {
-        color: #1e2a38 !important;
-        font-weight: bold;
-    }
-    /* Optional: Remove outline color on focus to match dark theme */
-    input:focus, select:focus {
-        outline: none !important;
-        box-shadow: 0 0 0 2px #4f46e5 !important; /* Optional focus ring */
-    }
     </style>
 """, unsafe_allow_html=True)
 with st.sidebar:
-    st.title("Deep Research Assistant")
-    topic = st.text_input("Topic to research")
-    report_type = st.selectbox("Type of report", [
-        "Summary - Short and fast ",
-        "Detailed Report ",
-        "Thorough Academic Research "
-    ])
-    tone = st.selectbox("Tone of the report", [
-        "Objective - Impartial and unbiased presentation of facts and findings",
-        "Persuasive - Advocating a specific point of view",
-        "Narrative - Storytelling tone for layperson readers"
-    ])
-    source_type = st.selectbox("Sources to include", ["Web Only", "Academic Only", "Hybrid"])
-    custom_domains = st.text_input("Query Domains (Optional)", placeholder="techcrunch.com, forbes.com")
-    research_button = st.button("Research")
-st.title("Research Output")
 if research_button and topic:
     try:
-        with st.status("Gathering data..."):
-            st.info("Fetching from sources...")
             all_sources = []
-            citations = []
             if source_type in ["Web Only", "Hybrid"]:
-                web_data = get_sources(topic, custom_domains)
-                for item in web_data:
-                    all_sources.append(item | {"source": "web"})
             if source_type in ["Academic Only", "Hybrid"]:
-                arxiv_data = get_arxiv_papers(topic)
-                for item in arxiv_data:
-                    all_sources.append(item | {"source": "arxiv"})
-                semantic_data = get_semantic_papers(topic)
-                for item in semantic_data:
-                    all_sources.append(item | {"source": "semantic"})
             merged = merge_duplicates(all_sources)
-            combined_text = ""
-            for m in merged:
-                combined_text += f"- [{m['title']}]({m['url']})\n> {m.get('snippet', m.get('summary', ''))[:300]}...\n\n"
-                citations.append(generate_apa_citation(m['title'], m['url'], m['source']))
-        with st.spinner("Synthesizing report..."):
-            if report_type == "Summary - Short and fast )":
-                prompt = f"""
-You are a research assistant. Based on the following sources related to the topic **{topic}**, provide a concise overview.
-Analyze and summarize based on the selected sources: {'Web Only' if source_type == 'Web Only' else 'Academic Only' if source_type == 'Academic Only' else 'Hybrid (Web + Academic)'}.
-Use a clear and accessible tone suitable for readers who want a quick understanding.
-                """
-            elif report_type == "Detailed Report ":
-                prompt = f"""
-You are a research analyst tasked with writing a structured research brief on the topic **{topic}**.
-Use the following sources ({'Web Only' if source_type == 'Web Only' else 'Academic Only' if source_type == 'Academic Only' else 'Hybrid'}) to:
-1. Write an **Introduction/Abstract** giving context and importance of the topic.
-2. Identify and explain the **Research Gap** present in the existing knowledge or implementations.
-3. Propose a **Novel Insight or Contribution** that can address the research gap.
-4. Include a section for **Citations in APA format** using the sources provided.
-                """
-            else:  # Thorough Academic Research
-                prompt = f"""
-You are an expert researcher writing a full academic paper on the topic **{topic}** using sources from {'Web Only' if source_type == 'Web Only' else 'Academic Only' if source_type == 'Academic Only' else 'Hybrid'}.
-The paper should include the following sections:
-1. **Introduction**: Provide context, background, and purpose of the paper.
-2. **Research Gap**: Identify current gaps or underexplored areas in this field.
-3. **Novelty/Contribution**: Describe the new idea, method, or perspective this paper offers.
-4. **Methodology**: Outline methods, models, or frameworks that can be applied to achieve this novelty.
-5. **Comparative Analysis** *(if applicable)*: Compare existing models/methods with the proposed one.
-6. **Future Directions**: Suggest further exploration paths or follow-up research.
-7. **Citations**: Include in-text references and a citation section in **APA format** only.
-                """
-            final_output = call_llm([{"role": "user", "content": prompt}])
-        st.markdown(f" {report_type}")
         st.markdown(final_output, unsafe_allow_html=True)
-        if report_type == "Thorough Academic Research ":
-            with st.spinner("Preparing PDF and LaTeX..."):
-                pdf_file = generate_pdf(final_output)
-                latex_file = generate_latex(final_output)
-                st.markdown(generate_download_button(pdf_file, "Research_Report.pdf", "application/pdf"), unsafe_allow_html=True)
-                st.markdown(generate_download_button(latex_file, "Research_Report.tex", "application/x-latex"), unsafe_allow_html=True)
         overlaps = check_plagiarism(final_output, topic)
         if overlaps:
             st.warning("Potential overlaps detected:")
             for hit in overlaps:
                 st.markdown(f"- [{hit['title']}]({hit['url']})")
         else:
-            st.success("No major overlaps found.")
     except Exception as e:
-        st.error(f"Error: {e}")

 from dotenv import load_dotenv
 from tavily import TavilyClient
 import feedparser
 from fuzzywuzzy import fuzz
 from PIL import Image
 from io import BytesIO
         domain_filters = [d.strip() for d in domains.split(",") if d.strip()]
         query += " site:" + " OR site:".join(domain_filters)
     response = tavily.search(query=query, search_depth="advanced", max_results=10)
+    return [{
+        "title": r["title"],
+        "url": r["url"],
+        "snippet": r.get("content", ""),
+        "source": "web"
+    } for r in response.get("results", [])]
 def get_arxiv_papers(query):
     from urllib.parse import quote_plus
     return [{
         "title": e.title,
         "summary": e.summary.replace("\n", " ").strip(),
+        "url": next((l.href for l in e.links if l.type == "application/pdf"), ""),
+        "source": "arxiv"
     } for e in feed.entries]
 def get_semantic_papers(query):
+    try:
+        url = "https://api.semanticscholar.org/graph/v1/paper/search"
+        params = {"query": query, "limit": 5, "fields": "title,abstract,url"}
+        response = requests.get(url, params=params)
+        papers = response.json().get("data", [])
+        return [{
+            "title": p.get("title"),
+            "summary": p.get("abstract", "No abstract available"),
+            "url": p.get("url"),
+            "source": "semantic"
+        } for p in papers]
+    except:
+        return []
 def check_plagiarism(text, topic):
     hits = []
 def generate_apa_citation(title, url, source):
     year = datetime.datetime.now().year
+    label = {"arxiv": "*arXiv*", "semantic": "*Semantic Scholar*", "web": "*Web Source*"}.get(source, "*Web*")
     return f"{title}. ({year}). {label}. {url}"
 def merge_duplicates(entries):
     """
 # --- Streamlit UI ---
+st.set_page_config("Deep Research Bot", layout="centered")
 st.markdown("""
     <style>
+    .stApp { background-color: #0f172a; color: white; }
+    h1, h2, h3 { color: #facc15; }
+    .css-1d391kg, .css-1kyxreq, .css-q8sbsg { background-color: #1e293b; color: white; border-radius: 10px; padding: 10px; }
     </style>
 """, unsafe_allow_html=True)
 with st.sidebar:
+    st.title("🧠 Deep Research Assistant")
+    topic = st.text_input("🔍 Enter your research topic")
+    report_type = st.selectbox("📄 Report Type", ["Summary", "Detailed Report", "Thorough Academic Research"])
+    tone = st.selectbox("🎯 Tone", ["Objective", "Persuasive", "Narrative"])
+    source_type = st.selectbox("📚 Sources", ["Web Only", "Academic Only", "Hybrid"])
+    custom_domains = st.text_input("🌐 Optional Web Domains", placeholder="example.com, techcrunch.com")
+    research_button = st.button("🔎 Run Deep Research")
+st.title("📘 Research Output")
 if research_button and topic:
     try:
+        with st.spinner("Gathering sources and analyzing deeply..."):
             all_sources = []
             if source_type in ["Web Only", "Hybrid"]:
+                all_sources += get_sources(topic, custom_domains)
             if source_type in ["Academic Only", "Hybrid"]:
+                all_sources += get_arxiv_papers(topic)
+                all_sources += get_semantic_papers(topic)
             merged = merge_duplicates(all_sources)
+            citations = [generate_apa_citation(m['title'], m['url'], m['source']) for m in merged]
+            combined_text = "\n\n".join([f"- [{m['title']}]({m['url']})\n> {m.get('snippet', m.get('summary', ''))[:300]}..." for m in merged])
+        prompt = f"""
+You are an expert assistant. Based on the following sources, write a {report_type.lower()} in a {tone.lower()} tone on the topic: {topic}.
+Sources:
+{combined_text}
+APA Citations:
+{chr(10).join(citations)}
+        """
+        st.subheader(f"📝 {report_type} on '{topic}'")
+        final_output = call_llm([{"role": "user", "content": prompt}])
         st.markdown(final_output, unsafe_allow_html=True)
+        if report_type == "Thorough Academic Research":
+            st.markdown("---")
+            st.subheader("📄 Downloads")
+            st.markdown(generate_download_button(generate_pdf(final_output), "Research_Report.pdf", "application/pdf"), unsafe_allow_html=True)
+            st.markdown(generate_download_button(generate_latex(final_output), "Research_Report.tex", "application/x-latex"), unsafe_allow_html=True)
         overlaps = check_plagiarism(final_output, topic)
+        st.markdown("---")
+        st.subheader("🔎 Plagiarism Check")
         if overlaps:
             st.warning("Potential overlaps detected:")
             for hit in overlaps:
                 st.markdown(f"- [{hit['title']}]({hit['url']})")
         else:
+            st.success("✅ No major overlaps found.")
     except Exception as e:
+        st.error(f"❌ Error occurred: {e}")