Spaces:

Ani14
/

AutoReasearcher

Sleeping

App Files Files Community

Ani14 commited on Apr 22, 2025

Commit

bcae0c1

verified ·

1 Parent(s): 81824c2

Update app.py

Browse files

Files changed (1) hide show

app.py +124 -96

app.py CHANGED Viewed

@@ -12,51 +12,15 @@ from fpdf import FPDF
 from io import BytesIO
 import base64
 from duckduckgo_search import DDGS
-# --- ENV & API KEYS ---
 load_dotenv()
 OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
 TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
 tavily = TavilyClient(api_key=TAVILY_API_KEY)
-# --- Streamlit UI ---
-st.set_page_config("Deep Research Assistant", layout="centered")
-# --- Mermaid.js for Mind Map ---
-st.markdown("""
-<script src="https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"></script>
-<script>
-    mermaid.initialize({ startOnLoad: true });
-</script>
-""", unsafe_allow_html=True)
-# --- Theme ---
-st.markdown("""
-<style>
-    .stApp { background-color: #0f172a; color: white; }
-    h1, h2, h3 { color: #facc15; }
-</style>
-""", unsafe_allow_html=True)
-# --- Session State Initialization ---
-if "last_report" not in st.session_state:
-    st.session_state.last_report = ""
-if "mindmap_triggered" not in st.session_state:
-    st.session_state.mindmap_triggered = False
-if "followup_question" not in st.session_state:
-    st.session_state.followup_question = ""
-# --- Sidebar Inputs ---
-with st.sidebar:
-    st.title("🧠 Deep Research Assistant")
-    topic = st.text_input("🔍 Enter your research topic")
-    report_type = st.selectbox("📄 Report Type", ["Summary", "Detailed Report", "Thorough Academic Research"])
-    tone = st.selectbox("🎯 Tone", ["Objective", "Persuasive", "Narrative"])
-    source_type = st.selectbox("📚 Sources", ["Web Only", "Academic Only", "Hybrid"])
-    custom_domains = st.text_input("🌐 Optional Web Domains", placeholder="example.com, forbes.com")
-    research_button = st.button("🚀 Run Deep Research")
-# --- LLM Call ---
 def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=3500, temperature=0.7):
     url = "https://openrouter.ai/api/v1/chat/completions"
     headers = {
@@ -87,15 +51,69 @@ def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=3
                         except json.JSONDecodeError:
                             pass
-# --- Source + PDF Helpers ---
-def get_sources(query):
     response = tavily.search(query=query, search_depth="advanced", max_results=10)
     return [{
-        "title": r["title"],
-        "url": r["url"],
-        "snippet": r.get("content", ""),
-        "source": "web"
-    } for r in response.get("results", [])]
 def generate_pdf(text):
     pdf = FPDF()
@@ -116,47 +134,74 @@ def generate_pdf(text):
 def generate_download_button(file, label, mime_type):
     b64 = base64.b64encode(file.read()).decode()
-    return f"""
-        <a href="data:{mime_type};base64,{b64}" download="{label}">
-            📥 Download {label}
-        </a>
-    """
-# --- Output Area ---
 st.title("📘 Research Output")
 if research_button and topic:
-    with st.status("🔍 Researching..."):
-        sources = get_sources(topic)
-        citations = [f"{s['title']} ({datetime.datetime.now().year}). {s['source']}. {s['url']}" for s in sources]
-        source_text = "\n\n".join([f"- [{s['title']}]({s['url']})\n> {s['snippet']}" for s in sources])
-        prompt = f"""
 You are an expert research assistant.
 1. Analyze the following sources.
 2. Identify research gaps and propose a novel topic.
 3. Write a {report_type.lower()} in a {tone.lower()} tone.
 Sources:
-{source_text}
-Citations:
 {chr(10).join(citations)}
-        """
-        st.subheader(f"📝 {report_type} on '{topic}'")
-        output_placeholder = st.empty()
-        final_output = ""
-        for chunk in call_llm([{"role": "user", "content": prompt}]):
-            final_output += chunk
-            output_placeholder.markdown(final_output, unsafe_allow_html=True)
-        st.session_state.last_report = final_output
-        st.subheader("📄 Downloads")
-        st.markdown(generate_download_button(generate_pdf(final_output), "Research_Report.pdf", "application/pdf"), unsafe_allow_html=True)
-# --- Mind Map ---
 st.subheader("🧭 Visual Mind Map")
 if st.button("🗺 Generate Mind Map"):
     st.session_state.mindmap_triggered = True
@@ -164,35 +209,18 @@ if st.button("🗺 Generate Mind Map"):
 if st.session_state.mindmap_triggered and st.session_state.last_report:
     try:
         mindmap_prompt = [
-            {"role": "system", "content": "Convert this research report into a mermaid.js mind map."},
             {"role": "user", "content": st.session_state.last_report}
         ]
         mindmap_code = ""
         for chunk in call_llm(mindmap_prompt):
             mindmap_code += chunk
-        mindmap_code = mindmap_code.replace("```mermaid", "").replace("```", "").strip()
-        st.markdown(f"<div class='mermaid'>{mindmap_code}</div>", unsafe_allow_html=True)
-    except Exception as e:
-        st.error(f"❌ Mind map error: {e}")
     finally:
         st.session_state.mindmap_triggered = False
-# --- Follow-Up ---
-st.subheader("💬 Ask a Follow-Up")
-follow_input = st.text_input("Ask a question about the report:")
-if st.button("🔄 Submit Follow-Up") and follow_input:
-    st.session_state.followup_question = follow_input
-if st.session_state.followup_question and st.session_state.last_report:
-    follow_prompt = [
-        {"role": "system", "content": "You are a helpful academic assistant."},
-        {"role": "user", "content": st.session_state.last_report},
-        {"role": "user", "content": st.session_state.followup_question}
-    ]
-    follow_output = ""
-    follow_box = st.empty()
-    for chunk in call_llm(follow_prompt):
-        follow_output += chunk
-        follow_box.markdown(follow_output, unsafe_allow_html=True)
-    st.session_state.followup_question = ""

 from io import BytesIO
 import base64
 from duckduckgo_search import DDGS
+import re
+# Load environment variables
 load_dotenv()
 OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
 TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
 tavily = TavilyClient(api_key=TAVILY_API_KEY)
+# --- Helper Functions ---
 def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=3500, temperature=0.7):
     url = "https://openrouter.ai/api/v1/chat/completions"
     headers = {
                         except json.JSONDecodeError:
                             pass
+def get_sources(topic, domains=None):
+    query = topic
+    if domains:
+        domain_filters = [d.strip() for d in domains.split(",") if d.strip()]
+        query += " site:" + " OR site:".join(domain_filters)
     response = tavily.search(query=query, search_depth="advanced", max_results=10)
+    results = []
+    for r in response.get("results", []):
+        image_url = r.get("image_url")
+        if not image_url:
+            try:
+                images = [img["image"] for img in DDGS().images(r["title"], max_results=1)]
+                image_url = images[0] if images else None
+            except:
+                image_url = None
+        results.append({
+            "title": r["title"],
+            "url": r["url"],
+            "snippet": r.get("content", ""),
+            "image_url": image_url,
+            "source": "web"
+        })
+    return results
+def get_arxiv_papers(query):
+    from urllib.parse import quote_plus
+    url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results=5"
+    feed = feedparser.parse(url)
     return [{
+        "title": e.title,
+        "summary": e.summary.replace("\n", " ").strip(),
+        "url": next((l.href for l in e.links if l.type == "application/pdf"), ""),
+        "source": "arxiv"
+    } for e in feed.entries]
+def get_semantic_papers(query):
+    try:
+        url = "https://api.semanticscholar.org/graph/v1/paper/search"
+        params = {"query": query, "limit": 5, "fields": "title,abstract,url"}
+        response = requests.get(url, params=params)
+        papers = response.json().get("data", [])
+        return [{
+            "title": p.get("title"),
+            "summary": p.get("abstract", "No abstract available"),
+            "url": p.get("url"),
+            "source": "semantic"
+        } for p in papers]
+    except:
+        return []
+def generate_apa_citation(title, url, source):
+    year = datetime.datetime.now().year
+    label = {"arxiv": "*arXiv*", "semantic": "*Semantic Scholar*", "web": "*Web Source*"}.get(source, "*Web*")
+    return f"{title}. ({year}). {label}. {url}"
+def merge_duplicates(entries):
+    unique = []
+    seen_titles = []
+    for entry in entries:
+        if all(fuzz.token_set_ratio(entry['title'], seen) < 90 for seen in seen_titles):
+            unique.append(entry)
+            seen_titles.append(entry['title'])
+    return unique
 def generate_pdf(text):
     pdf = FPDF()
 def generate_download_button(file, label, mime_type):
     b64 = base64.b64encode(file.read()).decode()
+    return f"""<a href="data:{mime_type};base64,{b64}" download="{label}">📥 Download {label}</a>"""
+# --- Streamlit UI ---
+st.set_page_config("Deep Research Assistant", layout="centered")
+if "last_report" not in st.session_state:
+    st.session_state.last_report = ""
+if "mindmap_triggered" not in st.session_state:
+    st.session_state.mindmap_triggered = False
+# Mermaid for mind map
+st.markdown("""
+<script src="https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js"></script>
+<script>mermaid.initialize({ startOnLoad: true });</script>
+<style>
+.stApp { background-color: #0f172a; color: white; }
+h1, h2, h3 { color: #facc15; }
+</style>
+""", unsafe_allow_html=True)
+with st.sidebar:
+    st.title("🧠 Deep Research Assistant")
+    topic = st.text_input("🔍 Enter your research topic")
+    report_type = st.selectbox("📄 Report Type", ["Summary", "Detailed Report", "Thorough Academic Research"])
+    tone = st.selectbox("🎯 Tone", ["Objective", "Persuasive", "Narrative"])
+    source_type = st.selectbox("📚 Sources", ["Web Only", "Academic Only", "Hybrid"])
+    custom_domains = st.text_input("🌐 Optional Web Domains", placeholder="example.com, forbes.com")
+    research_button = st.button("🚀 Run Deep Research")
 st.title("📘 Research Output")
 if research_button and topic:
+    sources = []
+    if source_type in ["Web Only", "Hybrid"]:
+        sources += get_sources(topic, custom_domains)
+    if source_type in ["Academic Only", "Hybrid"]:
+        sources += get_arxiv_papers(topic)
+        sources += get_semantic_papers(topic)
+    merged = merge_duplicates(sources)
+    citations = [generate_apa_citation(m['title'], m['url'], m['source']) for m in merged]
+    combined_text = "\n\n".join([f"- [{m['title']}]({m['url']})\n> {m.get('snippet', m.get('summary', ''))[:300]}..." for m in merged])
+    prompt = f"""
 You are an expert research assistant.
 1. Analyze the following sources.
 2. Identify research gaps and propose a novel topic.
 3. Write a {report_type.lower()} in a {tone.lower()} tone.
 Sources:
+{combined_text}
+APA Citations:
 {chr(10).join(citations)}
+    """
+    st.subheader(f"📝 {report_type} on '{topic}'")
+    full_output = ""
+    for chunk in call_llm([{"role": "user", "content": prompt}]):
+        full_output += chunk
+        st.markdown(full_output, unsafe_allow_html=True)
+    st.session_state.last_report = full_output
+    st.subheader("📄 Downloads")
+    st.markdown(generate_download_button(generate_pdf(full_output), "Research_Report.pdf", "application/pdf"), unsafe_allow_html=True)
+# 🔁 Mind Map Section
 st.subheader("🧭 Visual Mind Map")
 if st.button("🗺 Generate Mind Map"):
     st.session_state.mindmap_triggered = True
 if st.session_state.mindmap_triggered and st.session_state.last_report:
     try:
         mindmap_prompt = [
+            {"role": "system", "content": "You are a mermaid.js expert. Convert the given research report into a valid mermaid.js mind map. Only return the code between ```mermaid and ```."},
             {"role": "user", "content": st.session_state.last_report}
         ]
         mindmap_code = ""
         for chunk in call_llm(mindmap_prompt):
             mindmap_code += chunk
+        match = re.search(r"```mermaid(.*?)```", mindmap_code, re.DOTALL)
+        if match:
+            diagram = match.group(1).strip()
+            st.markdown(f"<div class='mermaid'>{diagram}</div>", unsafe_allow_html=True)
+        else:
+            st.warning("⚠️ Mermaid diagram not detected. Try again.")
     finally:
         st.session_state.mindmap_triggered = False