import os import streamlit as st import requests import feedparser import datetime from dotenv import load_dotenv from duckduckgo_search import DDGS load_dotenv() OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") # --- Call OpenRouter LLM --- def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=2048, temperature=0.7): url = "https://openrouter.ai/api/v1/chat/completions" headers = { "Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json", "X-Title": "Autonomous Research Assistant" } data = { "model": model, "messages": messages, "max_tokens": max_tokens, "temperature": temperature } try: response = requests.post(url, headers=headers, json=data) result = response.json() except Exception as e: raise RuntimeError(f"Failed to connect or parse response: {e}") if response.status_code != 200: raise RuntimeError(result.get("error", {}).get("message", "LLM API error")) if "choices" not in result: raise RuntimeError(f"Invalid response: {result}") return result["choices"][0]["message"]["content"] # --- Source Utilities --- def get_arxiv_papers(query, max_results=3): from urllib.parse import quote_plus url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results={max_results}" feed = feedparser.parse(url) return [{ "title": e.title or "Untitled", "summary": (e.summary or "No summary available").replace("\n", " ").strip(), "url": next((l.href for l in e.links if l.type == "application/pdf"), "") } for e in feed.entries] def get_semantic_scholar_papers(query, max_results=3): url = "https://api.semanticscholar.org/graph/v1/paper/search" params = {"query": query, "limit": max_results, "fields": "title,abstract,url"} response = requests.get(url, params=params) papers = response.json().get("data", []) return [{ "title": p.get("title") or "Untitled", "summary": (p.get("abstract") or "No abstract available").strip(), "url": p.get("url", "") } for p in papers] def search_duckduckgo(query, max_results=3): with DDGS() as ddgs: return [{ "title": r["title"] or "Untitled", "snippet": r["body"] or "", "url": r["href"] or "" } for r in ddgs.text(query, max_results=max_results)] def get_image_urls(query, max_images=3): with DDGS() as ddgs: return [img["image"] for img in ddgs.images(query, max_results=max_images)] def generate_apa_citation(title, url, source=""): current_year = datetime.datetime.now().year if source == "arxiv": return f"{title}. ({current_year}). *arXiv*. {url}" elif source == "semantic": return f"{title}. ({current_year}). *Semantic Scholar*. {url}" elif source == "web": return f"{title}. ({current_year}). *Web Source*. {url}" else: return f"{title}. ({current_year}). {url}" # --- Research Agent --- def autonomous_research_agent(topic): arxiv = get_arxiv_papers(topic) scholar = get_semantic_scholar_papers(topic) web = search_duckduckgo(topic) images = get_image_urls(topic) arxiv_md, arxiv_citations = "", [] for p in arxiv: arxiv_md += f"- [{p['title']}]({p['url']})\n> {p['summary'][:300]}...\n\n" arxiv_citations.append(generate_apa_citation(p["title"], p["url"], source="arxiv")) scholar_md, scholar_citations = "", [] for p in scholar: scholar_md += f"- [{p['title']}]({p['url']})\n> {p['summary'][:300]}...\n\n" scholar_citations.append(generate_apa_citation(p["title"], p["url"], source="semantic")) web_md, web_citations = "", [] for w in web: web_md += f"- [{w['title']}]({w['url']})\n> {w['snippet']}\n\n" web_citations.append(generate_apa_citation(w["title"], w["url"], source="web")) prompt = f""" # Research Topic: {topic} ## ArXiv: {arxiv_md} ## Semantic Scholar: {scholar_md} ## Web Insights: {web_md} Now synthesize this information into: 1. A research gap 2. A novel research direction 3. A full markdown-formatted research article (continuous, no section labels, academic tone) """ response = call_llm([{"role": "user", "content": prompt}], max_tokens=3000) # Append Sources response += "\n\n---\n### Sources Cited\n" if arxiv_md: response += "**ArXiv:**\n" + arxiv_md if scholar_md: response += "**Semantic Scholar:**\n" + scholar_md if web_md: response += "**Web:**\n" + web_md # APA Citations Section all_citations = arxiv_citations + scholar_citations + web_citations response += "\n---\n### 📚 APA Citations\n" for cite in all_citations: response += f"- {cite}\n" return response, images # --- Streamlit UI --- st.set_page_config("Autonomous Research Assistant", layout="wide") st.title("🤖 Autonomous AI Research Assistant") if "chat_history" not in st.session_state: st.session_state.chat_history = [] topic = st.text_input("Enter a research topic:") if st.button("Run Research Agent"): with st.spinner("Gathering sources & thinking..."): try: response, images = autonomous_research_agent(topic) # Display images if images: st.subheader("🖼️ Relevant Images") st.image(images, width=300) # Display markdown response st.session_state.chat_history.append({"role": "user", "content": topic}) st.session_state.chat_history.append({"role": "assistant", "content": response}) st.markdown(response) except Exception as e: st.error(f"Failed: {e}") # --- Follow-up Chat --- st.divider() st.subheader("💬 Follow-up Q&A") followup = st.text_input("Ask a follow-up question:") if st.button("Ask"): if followup: try: chat = st.session_state.chat_history + [{"role": "user", "content": followup}] answer = call_llm(chat, max_tokens=1500) st.session_state.chat_history.append({"role": "user", "content": followup}) st.session_state.chat_history.append({"role": "assistant", "content": answer}) st.markdown(answer) except Exception as e: st.error(f"Follow-up error: {e}")