Spaces:

Ani14
/

AutoReasearcher

Sleeping

File size: 7,525 Bytes

2d473c3
 
06e1e90
 
341824e
b664a70
06e1e90
8271c6c
2d473c3
 
8271c6c
06e1e90
f7fc82d
06e1e90
 
 
 
 
8271c6c
2d473c3
06e1e90
 
 
 
 
 
ff83913
 
 
 
8271c6c
ff83913
f7fc82d
8271c6c
 
 
06e1e90
b664a70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82bf609
8271c6c
 
 
 
 
82bf609
 
8271c6c
 
ff83913
8271c6c
 
 
 
82bf609
8271c6c
82bf609
 
8271c6c
82bf609
8271c6c
 
 
 
82bf609
 
 
8271c6c
 
f7fc82d
8271c6c
f7fc82d
8271c6c
341824e
 
 
 
 
 
 
 
 
 
 
 
82bf609
8271c6c
 
 
 
f7fc82d
8271c6c
341824e
8271c6c
f7fc82d
341824e
8271c6c
341824e
8271c6c
f7fc82d
341824e
8271c6c
341824e
8271c6c
f7fc82d
341824e
f7fc82d
 
 
 
 
 
 
 
 
8271c6c
f7fc82d
 
8271c6c
f7fc82d
 
 
 
 
82bf609
f7fc82d
8271c6c
f7fc82d
 
 
 
 
 
 
 
 
341824e
 
 
 
 
 
bc0e336
f7fc82d
 
 
06e1e90
 
 
 
 
 
f7fc82d
8271c6c
06e1e90
bc0e336
 
 
 
 
 
 
 
06e1e90
bc0e336
 
b664a70
 
 
 
 
 
 
 
 
 
 
06e1e90
8271c6c
06e1e90
82bf609
06e1e90
f7fc82d
 
8271c6c
06e1e90
f7fc82d

import os
import streamlit as st
import requests
import feedparser
import datetime
from fuzzywuzzy import fuzz
from dotenv import load_dotenv
from duckduckgo_search import DDGS

load_dotenv()
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")

# --- Call OpenRouter LLM ---
def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=2048, temperature=0.7):
    url = "https://openrouter.ai/api/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
        "Content-Type": "application/json",
        "X-Title": "Autonomous Research Assistant"
    }
    data = {
        "model": model,
        "messages": messages,
        "max_tokens": max_tokens,
        "temperature": temperature
    }
    try:
        response = requests.post(url, headers=headers, json=data)
        result = response.json()
    except Exception as e:
        raise RuntimeError(f"Failed to connect or parse response: {e}")
    if response.status_code != 200:
        raise RuntimeError(result.get("error", {}).get("message", "LLM API error"))
    if "choices" not in result:
        raise RuntimeError(f"Invalid response: {result}")
    return result["choices"][0]["message"]["content"]

def check_plagiarism(text, query, threshold=70):
    web_results = search_duckduckgo(query, max_results=5)
    plagiarized_snippets = []

    for result in web_results:
        snippet = result.get("snippet", "")
        similarity = fuzz.token_set_ratio(text, snippet)
        if similarity >= threshold:
            plagiarized_snippets.append({
                "title": result["title"],
                "url": result["url"],
                "snippet": snippet,
                "similarity": similarity
            })

    return plagiarized_snippets

# --- Source Utilities ---
def get_arxiv_papers(query, max_results=3):
    from urllib.parse import quote_plus
    url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results={max_results}"
    feed = feedparser.parse(url)
    return [{
        "title": e.title or "Untitled",
        "summary": (e.summary or "No summary available").replace("\n", " ").strip(),
        "url": next((l.href for l in e.links if l.type == "application/pdf"), "")
    } for e in feed.entries]

def get_semantic_scholar_papers(query, max_results=3):
    url = "https://api.semanticscholar.org/graph/v1/paper/search"
    params = {"query": query, "limit": max_results, "fields": "title,abstract,url"}
    response = requests.get(url, params=params)
    papers = response.json().get("data", [])
    return [{
        "title": p.get("title") or "Untitled",
        "summary": (p.get("abstract") or "No abstract available").strip(),
        "url": p.get("url", "")
    } for p in papers]

def search_duckduckgo(query, max_results=3):
    with DDGS() as ddgs:
        return [{
            "title": r["title"] or "Untitled",
            "snippet": r["body"] or "",
            "url": r["href"] or ""
        } for r in ddgs.text(query, max_results=max_results)]

def get_image_urls(query, max_images=3):
    with DDGS() as ddgs:
        return [img["image"] for img in ddgs.images(query, max_results=max_images)]

def generate_apa_citation(title, url, source=""):
    current_year = datetime.datetime.now().year
    if source == "arxiv":
        return f"{title}. ({current_year}). *arXiv*. {url}"
    elif source == "semantic":
        return f"{title}. ({current_year}). *Semantic Scholar*. {url}"
    elif source == "web":
        return f"{title}. ({current_year}). *Web Source*. {url}"
    else:
        return f"{title}. ({current_year}). {url}"


# --- Research Agent ---
def autonomous_research_agent(topic):
    arxiv = get_arxiv_papers(topic)
    scholar = get_semantic_scholar_papers(topic)
    web = search_duckduckgo(topic)
    images = get_image_urls(topic)

    arxiv_md, arxiv_citations = "", []
    for p in arxiv:
        arxiv_md += f"- [{p['title']}]({p['url']})\n> {p['summary'][:300]}...\n\n"
        arxiv_citations.append(generate_apa_citation(p["title"], p["url"], source="arxiv"))

    scholar_md, scholar_citations = "", []
    for p in scholar:
        scholar_md += f"- [{p['title']}]({p['url']})\n> {p['summary'][:300]}...\n\n"
        scholar_citations.append(generate_apa_citation(p["title"], p["url"], source="semantic"))

    web_md, web_citations = "", []
    for w in web:
        web_md += f"- [{w['title']}]({w['url']})\n> {w['snippet']}\n\n"
        web_citations.append(generate_apa_citation(w["title"], w["url"], source="web"))

    prompt = f"""
# Research Topic: {topic}

## ArXiv:
{arxiv_md}

## Semantic Scholar:
{scholar_md}

## Web Insights:
{web_md}

Now synthesize this information into:
1. A research gap
2. A novel research direction
3. A full markdown-formatted research article (continuous, no section labels, academic tone)
"""

    response = call_llm([{"role": "user", "content": prompt}], max_tokens=3000)

    # Append Sources
    response += "\n\n---\n### Sources Cited\n"
    if arxiv_md:
        response += "**ArXiv:**\n" + arxiv_md
    if scholar_md:
        response += "**Semantic Scholar:**\n" + scholar_md
    if web_md:
        response += "**Web:**\n" + web_md

    # APA Citations Section
    all_citations = arxiv_citations + scholar_citations + web_citations
    response += "\n---\n### 📚 APA Citations\n"
    for cite in all_citations:
        response += f"- {cite}\n"

    return response, images

# --- Streamlit UI ---
st.set_page_config("Autonomous Research Assistant", layout="wide")
st.title("🤖 Autonomous AI Research Assistant")

if "chat_history" not in st.session_state:
    st.session_state.chat_history = []

topic = st.text_input("Enter a research topic:")
if st.button("Run Research Agent"):
    with st.spinner("Gathering sources & thinking..."):
        try:
            response, images = autonomous_research_agent(topic)

            # Display images
            if images:
                st.subheader("🖼️ Relevant Images")
                st.image(images, width=300)

            # Display markdown response
            st.session_state.chat_history.append({"role": "user", "content": topic})
            st.session_state.chat_history.append({"role": "assistant", "content": response})
            st.markdown(response)
            # Check for plagiarism (optional feature)
plagiarism_hits = check_plagiarism(response, topic)

if plagiarism_hits:
    st.warning("⚠️ Potential overlap with existing web content detected.")
    st.subheader("🕵️ Plagiarism Check Results")
    for hit in plagiarism_hits:
        st.markdown(f"**{hit['title']}** - [{hit['url']}]({hit['url']})")
        st.markdown(f"> _Similarity: {hit['similarity']}%_\n\n{hit['snippet']}")
else:
    st.success("✅ No significant overlaps found. Content appears original.")
        except Exception as e:
            st.error(f"Failed: {e}")

# --- Follow-up Chat ---
st.divider()
st.subheader("💬 Follow-up Q&A")
followup = st.text_input("Ask a follow-up question:")
if st.button("Ask"):
    if followup:
        try:
            chat = st.session_state.chat_history + [{"role": "user", "content": followup}]
            answer = call_llm(chat, max_tokens=1500)
            st.session_state.chat_history.append({"role": "user", "content": followup})
            st.session_state.chat_history.append({"role": "assistant", "content": answer})
            st.markdown(answer)
        except Exception as e:
            st.error(f"Follow-up error: {e}")