Spaces:

Ani14
/

AutoReasearcher

Sleeping

App Files Files Community

Ani14 commited on Apr 21, 2025

Commit

65c3858

verified ·

1 Parent(s): a77e234

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -78

app.py CHANGED Viewed

@@ -3,16 +3,14 @@ import streamlit as st
 import requests
 import feedparser
 import datetime
-import base64
-import tempfile
 from dotenv import load_dotenv
 from duckduckgo_search import DDGS
-from fuzzywuzzy import fuzz
 load_dotenv()
 OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
-# --- LLM Call ---
 def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=2048, temperature=0.7):
     url = "https://openrouter.ai/api/v1/chat/completions"
     headers = {
@@ -26,13 +24,34 @@ def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=2
         "max_tokens": max_tokens,
         "temperature": temperature
     }
-    response = requests.post(url, headers=headers, json=data)
-    result = response.json()
     if response.status_code != 200:
         raise RuntimeError(result.get("error", {}).get("message", "LLM API error"))
     return result["choices"][0]["message"]["content"]
-# --- Search Helpers ---
 def get_arxiv_papers(query, max_results=3):
     from urllib.parse import quote_plus
     url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results={max_results}"
@@ -67,32 +86,37 @@ def get_image_urls(query, max_images=3):
         return [img["image"] for img in ddgs.images(query, max_results=max_images)]
 def generate_apa_citation(title, url, source=""):
-    year = datetime.datetime.now().year
     if source == "arxiv":
-        return f"{title}. ({year}). *arXiv*. {url}"
     elif source == "semantic":
-        return f"{title}. ({year}). *Semantic Scholar*. {url}"
     elif source == "web":
-        return f"{title}. ({year}). *Web Source*. {url}"
-    return f"{title}. ({year}). {url}"
-# --- Main Agent ---
 def autonomous_research_agent(topic):
     arxiv = get_arxiv_papers(topic)
     scholar = get_semantic_scholar_papers(topic)
     web = search_duckduckgo(topic)
     images = get_image_urls(topic)
-    def to_md_and_citations(papers, source):
-        md, citations = "", []
-        for p in papers:
-            md += f"- [{p['title']}]({p['url']})\n> {p['summary'][:300]}...\n\n"
-            citations.append(generate_apa_citation(p['title'], p['url'], source))
-        return md, citations
-    arxiv_md, arxiv_cite = to_md_and_citations(arxiv, "arxiv")
-    scholar_md, scholar_cite = to_md_and_citations(scholar, "semantic")
-    web_md, web_cite = to_md_and_citations(web, "web")
     prompt = f"""
 # Research Topic: {topic}
@@ -113,7 +137,7 @@ Now synthesize this information into:
 """
     response = call_llm([{"role": "user", "content": prompt}], max_tokens=3000)
-    # Append sources and citations
     response += "\n\n---\n### Sources Cited\n"
     if arxiv_md:
         response += "**ArXiv:**\n" + arxiv_md
@@ -122,37 +146,14 @@ Now synthesize this information into:
     if web_md:
         response += "**Web:**\n" + web_md
-    all_citations = arxiv_cite + scholar_cite + web_cite
-    response += "\n---\n### 📚 APA Citations\n" + "\n".join(f"- {c}" for c in all_citations)
     return response, images
-# --- Export Helper ---
-def export_file(content, export_format):
-    filename_base = f"research_output_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"
-    if export_format == "Markdown":
-        return content, f"{filename_base}.md"
-    elif export_format == "LaTeX":
-        tex = f"\\documentclass{{article}}\n\\begin{{document}}\n{content}\n\\end{{document}}"
-        return tex, f"{filename_base}.tex"
-    elif export_format == "PDF":
-        try:
-            from fpdf import FPDF
-        except ImportError:
-            st.error("Install fpdf with: `pip install fpdf`")
-            return None, None
-        pdf = FPDF()
-        pdf.add_page()
-        pdf.set_auto_page_break(auto=True, margin=15)
-        pdf.set_font("Arial", size=12)
-        for line in content.split('\n'):
-            pdf.multi_cell(0, 10, line)
-        path = tempfile.mktemp(suffix=".pdf")
-        pdf.output(path)
-        with open(path, "rb") as f:
-            return f.read(), f"{filename_base}.pdf"
-    return None, None
 # --- Streamlit UI ---
 st.set_page_config("Autonomous Research Assistant", layout="wide")
 st.title("🤖 Autonomous AI Research Assistant")
@@ -161,46 +162,45 @@ if "chat_history" not in st.session_state:
     st.session_state.chat_history = []
 topic = st.text_input("Enter a research topic:")
-if st.button("Run Research Agent") and topic:
     with st.spinner("Gathering sources & thinking..."):
         try:
             response, images = autonomous_research_agent(topic)
-            st.session_state.chat_history.append({"role": "user", "content": topic})
-            st.session_state.chat_history.append({"role": "assistant", "content": response})
             if images:
                 st.subheader("🖼️ Relevant Images")
                 st.image(images, width=300)
             st.markdown(response)
-            # Export options (only show after generation)
-            export_format = st.selectbox("📤 Export Format", ["Markdown", "LaTeX", "PDF"])
-            if st.button("Download Export"):
-                try:
-                    file_data, filename = export_file(response, export_format)
-                    if file_data:
-                        if isinstance(file_data, str):
-                            b64 = base64.b64encode(file_data.encode()).decode()
-                        else:
-                            b64 = base64.b64encode(file_data).decode()
-                        href = f'<a href="data:application/octet-stream;base64,{b64}" download="{filename}">📥 Download {filename}</a>'
-                        st.markdown(href, unsafe_allow_html=True)
-                except Exception as e:
-                    st.error(f"Export failed: {e}")
         except Exception as e:
-            st.error(f"Research failed: {e}")
 # --- Follow-up Chat ---
 st.divider()
 st.subheader("💬 Follow-up Q&A")
 followup = st.text_input("Ask a follow-up question:")
-if st.button("Ask") and followup:
-    try:
-        chat = st.session_state.chat_history + [{"role": "user", "content": followup}]
-        answer = call_llm(chat, max_tokens=1500)
-        st.session_state.chat_history.append({"role": "user", "content": followup})
-        st.session_state.chat_history.append({"role": "assistant", "content": answer})
-        st.markdown(answer)
-    except Exception as e:
-        st.error(f"Follow-up error: {e}")

 import requests
 import feedparser
 import datetime
+from fuzzywuzzy import fuzz
 from dotenv import load_dotenv
 from duckduckgo_search import DDGS
 load_dotenv()
 OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
+# --- Call OpenRouter LLM ---
 def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=2048, temperature=0.7):
     url = "https://openrouter.ai/api/v1/chat/completions"
     headers = {
         "max_tokens": max_tokens,
         "temperature": temperature
     }
+    try:
+        response = requests.post(url, headers=headers, json=data)
+        result = response.json()
+    except Exception as e:
+        raise RuntimeError(f"Failed to connect or parse response: {e}")
     if response.status_code != 200:
         raise RuntimeError(result.get("error", {}).get("message", "LLM API error"))
+    if "choices" not in result:
+        raise RuntimeError(f"Invalid response: {result}")
     return result["choices"][0]["message"]["content"]
+# --- Plagiarism Check ---
+def check_plagiarism(text, query, threshold=70):
+    web_results = search_duckduckgo(query, max_results=5)
+    plagiarized_snippets = []
+    for result in web_results:
+        snippet = result.get("snippet", "")
+        similarity = fuzz.token_set_ratio(text, snippet)
+        if similarity >= threshold:
+            plagiarized_snippets.append({
+                "title": result["title"],
+                "url": result["url"],
+                "snippet": snippet,
+                "similarity": similarity
+            })
+    return plagiarized_snippets
+# --- Source Utilities ---
 def get_arxiv_papers(query, max_results=3):
     from urllib.parse import quote_plus
     url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results={max_results}"
         return [img["image"] for img in ddgs.images(query, max_results=max_images)]
 def generate_apa_citation(title, url, source=""):
+    current_year = datetime.datetime.now().year
     if source == "arxiv":
+        return f"{title}. ({current_year}). *arXiv*. {url}"
     elif source == "semantic":
+        return f"{title}. ({current_year}). *Semantic Scholar*. {url}"
     elif source == "web":
+        return f"{title}. ({current_year}). *Web Source*. {url}"
+    else:
+        return f"{title}. ({current_year}). {url}"
+# --- Research Agent ---
 def autonomous_research_agent(topic):
     arxiv = get_arxiv_papers(topic)
     scholar = get_semantic_scholar_papers(topic)
     web = search_duckduckgo(topic)
     images = get_image_urls(topic)
+    arxiv_md, arxiv_citations = "", []
+    for p in arxiv:
+        arxiv_md += f"- [{p['title']}]({p['url']})\n> {p['summary'][:300]}...\n\n"
+        arxiv_citations.append(generate_apa_citation(p["title"], p["url"], source="arxiv"))
+    scholar_md, scholar_citations = "", []
+    for p in scholar:
+        scholar_md += f"- [{p['title']}]({p['url']})\n> {p['summary'][:300]}...\n\n"
+        scholar_citations.append(generate_apa_citation(p["title"], p["url"], source="semantic"))
+    web_md, web_citations = "", []
+    for w in web:
+        web_md += f"- [{w['title']}]({w['url']})\n> {w['snippet']}\n\n"
+        web_citations.append(generate_apa_citation(w["title"], w["url"], source="web"))
     prompt = f"""
 # Research Topic: {topic}
 """
     response = call_llm([{"role": "user", "content": prompt}], max_tokens=3000)
+    # Append Sources
     response += "\n\n---\n### Sources Cited\n"
     if arxiv_md:
         response += "**ArXiv:**\n" + arxiv_md
     if web_md:
         response += "**Web:**\n" + web_md
+    # APA Citations Section
+    all_citations = arxiv_citations + scholar_citations + web_citations
+    response += "\n---\n### 📚 APA Citations\n"
+    for cite in all_citations:
+        response += f"- {cite}\n"
     return response, images
 # --- Streamlit UI ---
 st.set_page_config("Autonomous Research Assistant", layout="wide")
 st.title("🤖 Autonomous AI Research Assistant")
     st.session_state.chat_history = []
 topic = st.text_input("Enter a research topic:")
+if st.button("Run Research Agent"):
     with st.spinner("Gathering sources & thinking..."):
         try:
             response, images = autonomous_research_agent(topic)
+            # Display images
             if images:
                 st.subheader("🖼️ Relevant Images")
                 st.image(images, width=300)
+            # Display markdown response
+            st.session_state.chat_history.append({"role": "user", "content": topic})
+            st.session_state.chat_history.append({"role": "assistant", "content": response})
             st.markdown(response)
+            # Check for plagiarism
+            plagiarism_hits = check_plagiarism(response, topic)
+            if plagiarism_hits:
+                st.warning("⚠️ Potential overlap with existing web content detected.")
+                st.subheader("🕵️ Plagiarism Check Results")
+                for hit in plagiarism_hits:
+                    st.markdown(f"**{hit['title']}** - [{hit['url']}]({hit['url']})")
+                    st.markdown(f"> _Similarity: {hit['similarity']}%_\n\n{hit['snippet']}")
+            else:
+                st.success("✅ No significant overlaps found. Content appears original.")
         except Exception as e:
+            st.error(f"Failed: {e}")
 # --- Follow-up Chat ---
 st.divider()
 st.subheader("💬 Follow-up Q&A")
 followup = st.text_input("Ask a follow-up question:")
+if st.button("Ask"):
+    if followup:
+        try:
+            chat = st.session_state.chat_history + [{"role": "user", "content": followup}]
+            answer = call_llm(chat, max_tokens=1500)
+            st.session_state.chat_history.append({"role": "user", "content": followup})
+            st.session_state.chat_history.append({"role": "assistant", "content": answer})
+            st.markdown(answer)
+        except Exception as e:
+            st.error(f"Follow-up error: {e}")