Spaces:

Ani14
/

AutoReasearcher

Sleeping

App Files Files Community

AutoReasearcher / app.py

VishnuCodes

Update app.py

8040d2d verified 9 months ago

raw

history blame

6.41 kB

	import os
	import streamlit as st
	import requests
	import feedparser
	import datetime
	from dotenv import load_dotenv
	from duckduckgo_search import DDGS

	load_dotenv()
	OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")

	# --- Call OpenRouter LLM ---
	def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=2048, temperature=0.7):
	url = "https://openrouter.ai/api/v1/chat/completions"
	headers = {
	"Authorization": f"Bearer {OPENROUTER_API_KEY}",
	"Content-Type": "application/json",
	"X-Title": "Autonomous Research Assistant"
	}
	data = {
	"model": model,
	"messages": messages,
	"max_tokens": max_tokens,
	"temperature": temperature
	}
	try:
	response = requests.post(url, headers=headers, json=data)
	result = response.json()
	except Exception as e:
	raise RuntimeError(f"Failed to connect or parse response: {e}")
	if response.status_code != 200:
	raise RuntimeError(result.get("error", {}).get("message", "LLM API error"))
	if "choices" not in result:
	raise RuntimeError(f"Invalid response: {result}")
	return result["choices"][0]["message"]["content"]

	# --- Source Utilities ---
	def get_arxiv_papers(query, max_results=3):
	from urllib.parse import quote_plus
	url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results={max_results}"
	feed = feedparser.parse(url)
	return [{
	"title": e.title or "Untitled",
	"summary": (e.summary or "No summary available").replace("\n", " ").strip(),
	"url": next((l.href for l in e.links if l.type == "application/pdf"), "")
	} for e in feed.entries]

	def get_semantic_scholar_papers(query, max_results=3):
	url = "https://api.semanticscholar.org/graph/v1/paper/search"
	params = {"query": query, "limit": max_results, "fields": "title,abstract,url"}
	response = requests.get(url, params=params)
	papers = response.json().get("data", [])
	return [{
	"title": p.get("title") or "Untitled",
	"summary": (p.get("abstract") or "No abstract available").strip(),
	"url": p.get("url", "")
	} for p in papers]

	def search_duckduckgo(query, max_results=3):
	with DDGS() as ddgs:
	return [{
	"title": r["title"] or "Untitled",
	"snippet": r["body"] or "",
	"url": r["href"] or ""
	} for r in ddgs.text(query, max_results=max_results)]

	def get_image_urls(query, max_images=3):
	with DDGS() as ddgs:
	return [img["image"] for img in ddgs.images(query, max_results=max_images)]

	def generate_apa_citation(title, url, source=""):
	current_year = datetime.datetime.now().year
	if source == "arxiv":
	return f"{title}. ({current_year}). arXiv. {url}"
	elif source == "semantic":
	return f"{title}. ({current_year}). Semantic Scholar. {url}"
	elif source == "web":
	return f"{title}. ({current_year}). Web Source. {url}"
	else:
	return f"{title}. ({current_year}). {url}"


	# --- Research Agent ---
	def autonomous_research_agent(topic):
	arxiv = get_arxiv_papers(topic)
	scholar = get_semantic_scholar_papers(topic)
	web = search_duckduckgo(topic)
	images = get_image_urls(topic)

	arxiv_md, arxiv_citations = "", []
	for p in arxiv:
	arxiv_md += f"- [{p['title']}]({p['url']})\n> {p['summary'][:300]}...\n\n"
	arxiv_citations.append(generate_apa_citation(p["title"], p["url"], source="arxiv"))

	scholar_md, scholar_citations = "", []
	for p in scholar:
	scholar_md += f"- [{p['title']}]({p['url']})\n> {p['summary'][:300]}...\n\n"
	scholar_citations.append(generate_apa_citation(p["title"], p["url"], source="semantic"))

	web_md, web_citations = "", []
	for w in web:
	web_md += f"- [{w['title']}]({w['url']})\n> {w['snippet']}\n\n"
	web_citations.append(generate_apa_citation(w["title"], w["url"], source="web"))

	prompt = f"""
	# Research Topic: {topic}

	## ArXiv:
	{arxiv_md}

	## Semantic Scholar:
	{scholar_md}

	## Web Insights:
	{web_md}

	Now synthesize this information into:
	1. A research gap
	2. A novel research direction
	3. A full markdown-formatted research article (continuous, no section labels, academic tone)
	"""

	response = call_llm([{"role": "user", "content": prompt}], max_tokens=3000)

	# Append Sources
	response += "\n\n---\n### Sources Cited\n"
	if arxiv_md:
	response += "ArXiv:\n" + arxiv_md
	if scholar_md:
	response += "Semantic Scholar:\n" + scholar_md
	if web_md:
	response += "Web:\n" + web_md

	# APA Citations Section
	all_citations = arxiv_citations + scholar_citations + web_citations
	response += "\n---\n### 📚 APA Citations\n"
	for cite in all_citations:
	response += f"- {cite}\n"

	return response, images

	# --- Streamlit UI ---
	st.set_page_config("Autonomous Research Assistant", layout="wide")
	st.title("🤖 Autonomous AI Research Assistant")

	if "chat_history" not in st.session_state:
	st.session_state.chat_history = []

	topic = st.text_input("Enter a research topic:")
	if st.button("Run Research Agent"):
	with st.spinner("Gathering sources & thinking..."):
	try:
	response, images = autonomous_research_agent(topic)

	# Display images
	if images:
	st.subheader("🖼️ Relevant Images")
	st.image(images, width=300)

	# Display markdown response
	st.session_state.chat_history.append({"role": "user", "content": topic})
	st.session_state.chat_history.append({"role": "assistant", "content": response})
	st.markdown(response)
	except Exception as e:
	st.error(f"Failed: {e}")

	# --- Follow-up Chat ---
	st.divider()
	st.subheader("💬 Follow-up Q&A")
	followup = st.text_input("Ask a follow-up question:")
	if st.button("Ask"):
	if followup:
	try:
	chat = st.session_state.chat_history + [{"role": "user", "content": followup}]
	answer = call_llm(chat, max_tokens=1500)
	st.session_state.chat_history.append({"role": "user", "content": followup})
	st.session_state.chat_history.append({"role": "assistant", "content": answer})
	st.markdown(answer)
	except Exception as e:
	st.error(f"Follow-up error: {e}")