AutoReasearcher / app.py
VishnuCodes's picture
Update app.py
8040d2d verified
raw
history blame
6.41 kB
import os
import streamlit as st
import requests
import feedparser
import datetime
from dotenv import load_dotenv
from duckduckgo_search import DDGS
load_dotenv()
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
# --- Call OpenRouter LLM ---
def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=2048, temperature=0.7):
url = "https://openrouter.ai/api/v1/chat/completions"
headers = {
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"Content-Type": "application/json",
"X-Title": "Autonomous Research Assistant"
}
data = {
"model": model,
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature
}
try:
response = requests.post(url, headers=headers, json=data)
result = response.json()
except Exception as e:
raise RuntimeError(f"Failed to connect or parse response: {e}")
if response.status_code != 200:
raise RuntimeError(result.get("error", {}).get("message", "LLM API error"))
if "choices" not in result:
raise RuntimeError(f"Invalid response: {result}")
return result["choices"][0]["message"]["content"]
# --- Source Utilities ---
def get_arxiv_papers(query, max_results=3):
from urllib.parse import quote_plus
url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results={max_results}"
feed = feedparser.parse(url)
return [{
"title": e.title or "Untitled",
"summary": (e.summary or "No summary available").replace("\n", " ").strip(),
"url": next((l.href for l in e.links if l.type == "application/pdf"), "")
} for e in feed.entries]
def get_semantic_scholar_papers(query, max_results=3):
url = "https://api.semanticscholar.org/graph/v1/paper/search"
params = {"query": query, "limit": max_results, "fields": "title,abstract,url"}
response = requests.get(url, params=params)
papers = response.json().get("data", [])
return [{
"title": p.get("title") or "Untitled",
"summary": (p.get("abstract") or "No abstract available").strip(),
"url": p.get("url", "")
} for p in papers]
def search_duckduckgo(query, max_results=3):
with DDGS() as ddgs:
return [{
"title": r["title"] or "Untitled",
"snippet": r["body"] or "",
"url": r["href"] or ""
} for r in ddgs.text(query, max_results=max_results)]
def get_image_urls(query, max_images=3):
with DDGS() as ddgs:
return [img["image"] for img in ddgs.images(query, max_results=max_images)]
def generate_apa_citation(title, url, source=""):
current_year = datetime.datetime.now().year
if source == "arxiv":
return f"{title}. ({current_year}). *arXiv*. {url}"
elif source == "semantic":
return f"{title}. ({current_year}). *Semantic Scholar*. {url}"
elif source == "web":
return f"{title}. ({current_year}). *Web Source*. {url}"
else:
return f"{title}. ({current_year}). {url}"
# --- Research Agent ---
def autonomous_research_agent(topic):
arxiv = get_arxiv_papers(topic)
scholar = get_semantic_scholar_papers(topic)
web = search_duckduckgo(topic)
images = get_image_urls(topic)
arxiv_md, arxiv_citations = "", []
for p in arxiv:
arxiv_md += f"- [{p['title']}]({p['url']})\n> {p['summary'][:300]}...\n\n"
arxiv_citations.append(generate_apa_citation(p["title"], p["url"], source="arxiv"))
scholar_md, scholar_citations = "", []
for p in scholar:
scholar_md += f"- [{p['title']}]({p['url']})\n> {p['summary'][:300]}...\n\n"
scholar_citations.append(generate_apa_citation(p["title"], p["url"], source="semantic"))
web_md, web_citations = "", []
for w in web:
web_md += f"- [{w['title']}]({w['url']})\n> {w['snippet']}\n\n"
web_citations.append(generate_apa_citation(w["title"], w["url"], source="web"))
prompt = f"""
# Research Topic: {topic}
## ArXiv:
{arxiv_md}
## Semantic Scholar:
{scholar_md}
## Web Insights:
{web_md}
Now synthesize this information into:
1. A research gap
2. A novel research direction
3. A full markdown-formatted research article (continuous, no section labels, academic tone)
"""
response = call_llm([{"role": "user", "content": prompt}], max_tokens=3000)
# Append Sources
response += "\n\n---\n### Sources Cited\n"
if arxiv_md:
response += "**ArXiv:**\n" + arxiv_md
if scholar_md:
response += "**Semantic Scholar:**\n" + scholar_md
if web_md:
response += "**Web:**\n" + web_md
# APA Citations Section
all_citations = arxiv_citations + scholar_citations + web_citations
response += "\n---\n### πŸ“š APA Citations\n"
for cite in all_citations:
response += f"- {cite}\n"
return response, images
# --- Streamlit UI ---
st.set_page_config("Autonomous Research Assistant", layout="wide")
st.title("πŸ€– Autonomous AI Research Assistant")
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
topic = st.text_input("Enter a research topic:")
if st.button("Run Research Agent"):
with st.spinner("Gathering sources & thinking..."):
try:
response, images = autonomous_research_agent(topic)
# Display images
if images:
st.subheader("πŸ–ΌοΈ Relevant Images")
st.image(images, width=300)
# Display markdown response
st.session_state.chat_history.append({"role": "user", "content": topic})
st.session_state.chat_history.append({"role": "assistant", "content": response})
st.markdown(response)
except Exception as e:
st.error(f"Failed: {e}")
# --- Follow-up Chat ---
st.divider()
st.subheader("πŸ’¬ Follow-up Q&A")
followup = st.text_input("Ask a follow-up question:")
if st.button("Ask"):
if followup:
try:
chat = st.session_state.chat_history + [{"role": "user", "content": followup}]
answer = call_llm(chat, max_tokens=1500)
st.session_state.chat_history.append({"role": "user", "content": followup})
st.session_state.chat_history.append({"role": "assistant", "content": answer})
st.markdown(answer)
except Exception as e:
st.error(f"Follow-up error: {e}")