Spaces:
Sleeping
Sleeping
File size: 7,525 Bytes
2d473c3 06e1e90 341824e b664a70 06e1e90 8271c6c 2d473c3 8271c6c 06e1e90 f7fc82d 06e1e90 8271c6c 2d473c3 06e1e90 ff83913 8271c6c ff83913 f7fc82d 8271c6c 06e1e90 b664a70 82bf609 8271c6c 82bf609 8271c6c ff83913 8271c6c 82bf609 8271c6c 82bf609 8271c6c 82bf609 8271c6c 82bf609 8271c6c f7fc82d 8271c6c f7fc82d 8271c6c 341824e 82bf609 8271c6c f7fc82d 8271c6c 341824e 8271c6c f7fc82d 341824e 8271c6c 341824e 8271c6c f7fc82d 341824e 8271c6c 341824e 8271c6c f7fc82d 341824e f7fc82d 8271c6c f7fc82d 8271c6c f7fc82d 82bf609 f7fc82d 8271c6c f7fc82d 341824e bc0e336 f7fc82d 06e1e90 f7fc82d 8271c6c 06e1e90 bc0e336 06e1e90 bc0e336 b664a70 06e1e90 8271c6c 06e1e90 82bf609 06e1e90 f7fc82d 8271c6c 06e1e90 f7fc82d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
import os
import streamlit as st
import requests
import feedparser
import datetime
from fuzzywuzzy import fuzz
from dotenv import load_dotenv
from duckduckgo_search import DDGS
load_dotenv()
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
# --- Call OpenRouter LLM ---
def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=2048, temperature=0.7):
url = "https://openrouter.ai/api/v1/chat/completions"
headers = {
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
"Content-Type": "application/json",
"X-Title": "Autonomous Research Assistant"
}
data = {
"model": model,
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature
}
try:
response = requests.post(url, headers=headers, json=data)
result = response.json()
except Exception as e:
raise RuntimeError(f"Failed to connect or parse response: {e}")
if response.status_code != 200:
raise RuntimeError(result.get("error", {}).get("message", "LLM API error"))
if "choices" not in result:
raise RuntimeError(f"Invalid response: {result}")
return result["choices"][0]["message"]["content"]
def check_plagiarism(text, query, threshold=70):
web_results = search_duckduckgo(query, max_results=5)
plagiarized_snippets = []
for result in web_results:
snippet = result.get("snippet", "")
similarity = fuzz.token_set_ratio(text, snippet)
if similarity >= threshold:
plagiarized_snippets.append({
"title": result["title"],
"url": result["url"],
"snippet": snippet,
"similarity": similarity
})
return plagiarized_snippets
# --- Source Utilities ---
def get_arxiv_papers(query, max_results=3):
from urllib.parse import quote_plus
url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results={max_results}"
feed = feedparser.parse(url)
return [{
"title": e.title or "Untitled",
"summary": (e.summary or "No summary available").replace("\n", " ").strip(),
"url": next((l.href for l in e.links if l.type == "application/pdf"), "")
} for e in feed.entries]
def get_semantic_scholar_papers(query, max_results=3):
url = "https://api.semanticscholar.org/graph/v1/paper/search"
params = {"query": query, "limit": max_results, "fields": "title,abstract,url"}
response = requests.get(url, params=params)
papers = response.json().get("data", [])
return [{
"title": p.get("title") or "Untitled",
"summary": (p.get("abstract") or "No abstract available").strip(),
"url": p.get("url", "")
} for p in papers]
def search_duckduckgo(query, max_results=3):
with DDGS() as ddgs:
return [{
"title": r["title"] or "Untitled",
"snippet": r["body"] or "",
"url": r["href"] or ""
} for r in ddgs.text(query, max_results=max_results)]
def get_image_urls(query, max_images=3):
with DDGS() as ddgs:
return [img["image"] for img in ddgs.images(query, max_results=max_images)]
def generate_apa_citation(title, url, source=""):
current_year = datetime.datetime.now().year
if source == "arxiv":
return f"{title}. ({current_year}). *arXiv*. {url}"
elif source == "semantic":
return f"{title}. ({current_year}). *Semantic Scholar*. {url}"
elif source == "web":
return f"{title}. ({current_year}). *Web Source*. {url}"
else:
return f"{title}. ({current_year}). {url}"
# --- Research Agent ---
def autonomous_research_agent(topic):
arxiv = get_arxiv_papers(topic)
scholar = get_semantic_scholar_papers(topic)
web = search_duckduckgo(topic)
images = get_image_urls(topic)
arxiv_md, arxiv_citations = "", []
for p in arxiv:
arxiv_md += f"- [{p['title']}]({p['url']})\n> {p['summary'][:300]}...\n\n"
arxiv_citations.append(generate_apa_citation(p["title"], p["url"], source="arxiv"))
scholar_md, scholar_citations = "", []
for p in scholar:
scholar_md += f"- [{p['title']}]({p['url']})\n> {p['summary'][:300]}...\n\n"
scholar_citations.append(generate_apa_citation(p["title"], p["url"], source="semantic"))
web_md, web_citations = "", []
for w in web:
web_md += f"- [{w['title']}]({w['url']})\n> {w['snippet']}\n\n"
web_citations.append(generate_apa_citation(w["title"], w["url"], source="web"))
prompt = f"""
# Research Topic: {topic}
## ArXiv:
{arxiv_md}
## Semantic Scholar:
{scholar_md}
## Web Insights:
{web_md}
Now synthesize this information into:
1. A research gap
2. A novel research direction
3. A full markdown-formatted research article (continuous, no section labels, academic tone)
"""
response = call_llm([{"role": "user", "content": prompt}], max_tokens=3000)
# Append Sources
response += "\n\n---\n### Sources Cited\n"
if arxiv_md:
response += "**ArXiv:**\n" + arxiv_md
if scholar_md:
response += "**Semantic Scholar:**\n" + scholar_md
if web_md:
response += "**Web:**\n" + web_md
# APA Citations Section
all_citations = arxiv_citations + scholar_citations + web_citations
response += "\n---\n### ๐ APA Citations\n"
for cite in all_citations:
response += f"- {cite}\n"
return response, images
# --- Streamlit UI ---
st.set_page_config("Autonomous Research Assistant", layout="wide")
st.title("๐ค Autonomous AI Research Assistant")
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
topic = st.text_input("Enter a research topic:")
if st.button("Run Research Agent"):
with st.spinner("Gathering sources & thinking..."):
try:
response, images = autonomous_research_agent(topic)
# Display images
if images:
st.subheader("๐ผ๏ธ Relevant Images")
st.image(images, width=300)
# Display markdown response
st.session_state.chat_history.append({"role": "user", "content": topic})
st.session_state.chat_history.append({"role": "assistant", "content": response})
st.markdown(response)
# Check for plagiarism (optional feature)
plagiarism_hits = check_plagiarism(response, topic)
if plagiarism_hits:
st.warning("โ ๏ธ Potential overlap with existing web content detected.")
st.subheader("๐ต๏ธ Plagiarism Check Results")
for hit in plagiarism_hits:
st.markdown(f"**{hit['title']}** - [{hit['url']}]({hit['url']})")
st.markdown(f"> _Similarity: {hit['similarity']}%_\n\n{hit['snippet']}")
else:
st.success("โ
No significant overlaps found. Content appears original.")
except Exception as e:
st.error(f"Failed: {e}")
# --- Follow-up Chat ---
st.divider()
st.subheader("๐ฌ Follow-up Q&A")
followup = st.text_input("Ask a follow-up question:")
if st.button("Ask"):
if followup:
try:
chat = st.session_state.chat_history + [{"role": "user", "content": followup}]
answer = call_llm(chat, max_tokens=1500)
st.session_state.chat_history.append({"role": "user", "content": followup})
st.session_state.chat_history.append({"role": "assistant", "content": answer})
st.markdown(answer)
except Exception as e:
st.error(f"Follow-up error: {e}") |