Spaces:

Ani14
/

AutoReasearcher

Sleeping

App Files Files Community

Ani14 commited on Apr 21, 2025

Commit

2b97b69

verified ·

1 Parent(s): 2fc6967

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -151

app.py CHANGED Viewed

@@ -2,24 +2,47 @@ import os
 import streamlit as st
 import requests
 import datetime
-from dotenv import load_dotenv
-from tavily import TavilyClient
 import feedparser
 import time
 from fuzzywuzzy import fuzz
 from PIL import Image
 from io import BytesIO
 from fpdf import FPDF
-import base64
-# Load environment variables
 load_dotenv()
 OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
-TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
 tavily = TavilyClient(api_key=TAVILY_API_KEY)
 # --- Helper Functions ---
-def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=3500, temperature=0.7):
     url = "https://openrouter.ai/api/v1/chat/completions"
     headers = {
         "Authorization": f"Bearer {OPENROUTER_API_KEY}",
@@ -43,6 +66,7 @@ def get_sources(topic, domains=None):
     if domains:
         domain_filters = [d.strip() for d in domains.split(",") if d.strip()]
         query += " site:" + " OR site:".join(domain_filters)
     response = tavily.search(query=query, search_depth="advanced", max_results=10)
     sources = []
     for item in response.get("results", []):
@@ -54,7 +78,6 @@ def get_sources(topic, domains=None):
     return sources
 def get_arxiv_papers(query):
-    from urllib.parse import quote_plus
     url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results=5"
     feed = feedparser.parse(url)
     return [{
@@ -70,147 +93,4 @@ def get_semantic_papers(query):
     papers = response.json().get("data", [])
     return [{
         "title": p.get("title"),
-        "summary": p.get("abstract", "No abstract available"),
-        "url": p.get("url")
-    } for p in papers]
-def check_plagiarism(text, topic):
-    hits = []
-    for r in get_sources(topic):
-        similarity = fuzz.token_set_ratio(text, r["snippet"])
-        if similarity >= 75:
-            hits.append(r)
-    return hits
-def generate_apa_citation(title, url, source):
-    year = datetime.datetime.now().year
-    label = {
-        "arxiv": "*arXiv*", "semantic": "*Semantic Scholar*", "web": "*Web Source*"
-    }.get(source, "*Web*")
-    return f"{title}. ({year}). {label}. {url}"
-def merge_duplicates(entries):
-    unique = []
-    seen_titles = []
-    for entry in entries:
-        if all(fuzz.token_set_ratio(entry['title'], seen) < 90 for seen in seen_titles):
-            unique.append(entry)
-            seen_titles.append(entry['title'])
-    return unique
-def generate_pdf(text):
-    pdf = FPDF()
-    pdf.add_page()
-    pdf.set_auto_page_break(auto=True, margin=15)
-    pdf.set_font("Arial", size=12)
-    for line in text.split('\n'):
-        pdf.multi_cell(0, 10, line)
-    pdf_output = BytesIO()
-    pdf.output(pdf_output)
-    pdf_output.seek(0)
-    return pdf_output
-def generate_latex(text):
-    latex = "\\documentclass{article}\n\\usepackage{hyperref}\n\\begin{document}\n"
-    for line in text.split('\n'):
-        latex += line.replace('_', '\\_') + "\\\\\n"
-    latex += "\\end{document}"
-    return BytesIO(latex.encode("utf-8"))
-def generate_download_button(file, label, mime_type):
-    b64 = base64.b64encode(file.read()).decode()
-    return f"""
-        <a href="data:{mime_type};base64,{b64}" download="{label}">
-            📥 Download {label}
-        </a>
-    """
-# --- Streamlit UI ---
-st.set_page_config("Deep Research Bot", layout="wide")
-with st.sidebar:
-    st.title("🧠 Deep Research Assistant")
-    topic = st.text_input("💡 Topic to research")
-    report_type = st.selectbox("📄 Type of report", [
-        "Summary - Short and fast (~2 min)",
-        "Detailed Report (~5 min)",
-        "Thorough Academic Research (~10 min)"
-    ])
-    tone = st.selectbox("🎯 Tone of the report", [
-        "Objective - Impartial and unbiased presentation of facts and findings",
-        "Persuasive - Advocating a specific point of view",
-        "Narrative - Storytelling tone for layperson readers"
-    ])
-    source_type = st.selectbox("🌐 Sources to include", ["Web Only", "Academic Only", "Hybrid"])
-    custom_domains = st.text_input("🔍 Query Domains (Optional)", placeholder="techcrunch.com, forbes.com")
-    research_button = st.button("Research")
-st.title("📑 Research Output")
-if research_button and topic:
-    try:
-        with st.status("🔍 Gathering data..."):
-            st.info("Fetching from sources...")
-            all_sources = []
-            citations = []
-            if source_type in ["Web Only", "Hybrid"]:
-                web_data = get_sources(topic, custom_domains)
-                for item in web_data:
-                    all_sources.append(item | {"source": "web"})
-            if source_type in ["Academic Only", "Hybrid"]:
-                arxiv_data = get_arxiv_papers(topic)
-                for item in arxiv_data:
-                    all_sources.append(item | {"source": "arxiv"})
-                semantic_data = get_semantic_papers(topic)
-                for item in semantic_data:
-                    all_sources.append(item | {"source": "semantic"})
-            merged = merge_duplicates(all_sources)
-            combined_text = ""
-            for m in merged:
-                combined_text += f"- [{m['title']}]({m['url']})\n> {m.get('snippet', m.get('summary', ''))[:300]}...\n\n"
-                citations.append(generate_apa_citation(m['title'], m['url'], m['source']))
-        with st.spinner("✍️ Synthesizing report..."):
-            prompt = f"""
-# Research Topic: {topic}
-Tone: {tone}
-Type: {report_type}
-Sources:
-{combined_text}
-Write the report in academic markdown with paragraphs (use bullet points only when necessary). Include:
-1. Introduction
-2. Research Gap
-3. Novel Insight
-4. Application
-5. Full Academic Writeup if Thorough Report
-            """
-            final_output = call_llm([{"role": "user", "content": prompt}])
-        st.markdown(f"### 📄 {report_type}")
-        st.markdown(final_output, unsafe_allow_html=True)
-        st.markdown("### 📚 Citations (APA Format)")
-        for cite in citations:
-            st.markdown(f"- {cite}")
-        if report_type == "Thorough Academic Research (~10 min)":
-            with st.spinner("📦 Preparing PDF and LaTeX..."):
-                pdf_file = generate_pdf(final_output)
-                latex_file = generate_latex(final_output)
-                st.markdown(generate_download_button(pdf_file, "Research_Report.pdf", "application/pdf"), unsafe_allow_html=True)
-                st.markdown(generate_download_button(latex_file, "Research_Report.tex", "application/x-latex"), unsafe_allow_html=True)
-        overlaps = check_plagiarism(final_output, topic)
-        if overlaps:
-            st.warning("⚠️ Potential overlaps detected:")
-            for hit in overlaps:
-                st.markdown(f"- [{hit['title']}]({hit['url']})")
-        else:
-            st.success("✅ No major overlaps found.")
-    except Exception as e:
-        st.error(f"Error: {e}")

 import streamlit as st
 import requests
 import datetime
 import feedparser
 import time
+from dotenv import load_dotenv
+from tavily import TavilyClient
 from fuzzywuzzy import fuzz
+from urllib.parse import quote_plus
 from PIL import Image
 from io import BytesIO
 from fpdf import FPDF
+# --- Load Keys ---
 load_dotenv()
 OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
+TAVILY_API_KEY = os.getenv("TAVILY_API_KEY", "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")
 tavily = TavilyClient(api_key=TAVILY_API_KEY)
+# --- Layout ---
+st.set_page_config("Deep Research Bot", layout="wide")
+with st.sidebar:
+    st.title("🧭 Research Input")
+    topic = st.text_input("💡 What would you like me to research next?")
+    report_type = st.selectbox("📄 Type of report", [
+        "Summary - Short and fast (~2 min)",
+        "Detailed Report (~5 min)",
+        "Thorough Academic Research (~10 min)"
+    ])
+    tone = st.selectbox("🎯 Tone of the report", [
+        "Objective - Impartial and unbiased presentation of facts and findings",
+        "Persuasive - Advocating a specific point of view",
+        "Narrative - Storytelling tone for layperson readers"
+    ])
+    source_type = st.selectbox("🌐 Sources to include", [
+        "Web Only", "Academic Only", "Hybrid"
+    ])
+    custom_domains = st.text_input("🔍 Query Domains (Optional)", placeholder="techcrunch.com, forbes.com")
+st.title("🤖 Real-time Deep Research Agent (Tavily Edition)")
+st.markdown("This powerful assistant autonomously gathers, analyzes, and synthesizes research from multiple sources in real-time using Tavily, ArXiv, and Semantic Scholar.")
 # --- Helper Functions ---
+def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=2048, temperature=0.7):
     url = "https://openrouter.ai/api/v1/chat/completions"
     headers = {
         "Authorization": f"Bearer {OPENROUTER_API_KEY}",
     if domains:
         domain_filters = [d.strip() for d in domains.split(",") if d.strip()]
         query += " site:" + " OR site:".join(domain_filters)
     response = tavily.search(query=query, search_depth="advanced", max_results=10)
     sources = []
     for item in response.get("results", []):
     return sources
 def get_arxiv_papers(query):
     url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results=5"
     feed = feedparser.parse(url)
     return [{
     papers = response.json().get("data", [])
     return [{
         "title": p.get("title"),
+        "summary":