File size: 6,412 Bytes
2d473c3
 
06e1e90
 
8040d2d
06e1e90
8271c6c
2d473c3
 
8271c6c
06e1e90
f7fc82d
06e1e90
 
 
 
 
8271c6c
2d473c3
06e1e90
 
 
 
 
 
ff83913
 
 
 
8271c6c
ff83913
f7fc82d
8271c6c
 
 
06e1e90
82bf609
8271c6c
 
 
 
 
82bf609
 
8271c6c
 
ff83913
8271c6c
 
 
 
82bf609
8271c6c
82bf609
 
8271c6c
82bf609
8271c6c
 
 
 
82bf609
 
 
8271c6c
 
f7fc82d
8271c6c
f7fc82d
8271c6c
8040d2d
 
 
 
 
 
 
 
 
 
 
 
82bf609
8271c6c
 
 
 
f7fc82d
8271c6c
8040d2d
8271c6c
f7fc82d
8040d2d
8271c6c
8040d2d
8271c6c
f7fc82d
8040d2d
8271c6c
8040d2d
8271c6c
f7fc82d
8040d2d
f7fc82d
 
 
 
 
 
 
 
 
8271c6c
f7fc82d
 
8271c6c
f7fc82d
 
 
 
 
82bf609
f7fc82d
8271c6c
f7fc82d
 
 
 
 
 
 
 
 
8040d2d
 
 
 
 
 
bc0e336
f7fc82d
 
 
06e1e90
 
 
 
 
 
f7fc82d
8271c6c
06e1e90
bc0e336
 
 
 
 
 
 
 
06e1e90
bc0e336
 
06e1e90
8271c6c
06e1e90
82bf609
06e1e90
f7fc82d
 
8271c6c
06e1e90
f7fc82d
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import os
import streamlit as st
import requests
import feedparser
import datetime
from dotenv import load_dotenv
from duckduckgo_search import DDGS

load_dotenv()
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")

# --- Call OpenRouter LLM ---
def call_llm(messages, model="deepseek/deepseek-chat-v3-0324:free", max_tokens=2048, temperature=0.7):
    url = "https://openrouter.ai/api/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
        "Content-Type": "application/json",
        "X-Title": "Autonomous Research Assistant"
    }
    data = {
        "model": model,
        "messages": messages,
        "max_tokens": max_tokens,
        "temperature": temperature
    }
    try:
        response = requests.post(url, headers=headers, json=data)
        result = response.json()
    except Exception as e:
        raise RuntimeError(f"Failed to connect or parse response: {e}")
    if response.status_code != 200:
        raise RuntimeError(result.get("error", {}).get("message", "LLM API error"))
    if "choices" not in result:
        raise RuntimeError(f"Invalid response: {result}")
    return result["choices"][0]["message"]["content"]

# --- Source Utilities ---
def get_arxiv_papers(query, max_results=3):
    from urllib.parse import quote_plus
    url = f"http://export.arxiv.org/api/query?search_query=all:{quote_plus(query)}&start=0&max_results={max_results}"
    feed = feedparser.parse(url)
    return [{
        "title": e.title or "Untitled",
        "summary": (e.summary or "No summary available").replace("\n", " ").strip(),
        "url": next((l.href for l in e.links if l.type == "application/pdf"), "")
    } for e in feed.entries]

def get_semantic_scholar_papers(query, max_results=3):
    url = "https://api.semanticscholar.org/graph/v1/paper/search"
    params = {"query": query, "limit": max_results, "fields": "title,abstract,url"}
    response = requests.get(url, params=params)
    papers = response.json().get("data", [])
    return [{
        "title": p.get("title") or "Untitled",
        "summary": (p.get("abstract") or "No abstract available").strip(),
        "url": p.get("url", "")
    } for p in papers]

def search_duckduckgo(query, max_results=3):
    with DDGS() as ddgs:
        return [{
            "title": r["title"] or "Untitled",
            "snippet": r["body"] or "",
            "url": r["href"] or ""
        } for r in ddgs.text(query, max_results=max_results)]

def get_image_urls(query, max_images=3):
    with DDGS() as ddgs:
        return [img["image"] for img in ddgs.images(query, max_results=max_images)]

def generate_apa_citation(title, url, source=""):
    current_year = datetime.datetime.now().year
    if source == "arxiv":
        return f"{title}. ({current_year}). *arXiv*. {url}"
    elif source == "semantic":
        return f"{title}. ({current_year}). *Semantic Scholar*. {url}"
    elif source == "web":
        return f"{title}. ({current_year}). *Web Source*. {url}"
    else:
        return f"{title}. ({current_year}). {url}"


# --- Research Agent ---
def autonomous_research_agent(topic):
    arxiv = get_arxiv_papers(topic)
    scholar = get_semantic_scholar_papers(topic)
    web = search_duckduckgo(topic)
    images = get_image_urls(topic)

    arxiv_md, arxiv_citations = "", []
    for p in arxiv:
        arxiv_md += f"- [{p['title']}]({p['url']})\n> {p['summary'][:300]}...\n\n"
        arxiv_citations.append(generate_apa_citation(p["title"], p["url"], source="arxiv"))

    scholar_md, scholar_citations = "", []
    for p in scholar:
        scholar_md += f"- [{p['title']}]({p['url']})\n> {p['summary'][:300]}...\n\n"
        scholar_citations.append(generate_apa_citation(p["title"], p["url"], source="semantic"))

    web_md, web_citations = "", []
    for w in web:
        web_md += f"- [{w['title']}]({w['url']})\n> {w['snippet']}\n\n"
        web_citations.append(generate_apa_citation(w["title"], w["url"], source="web"))

    prompt = f"""
# Research Topic: {topic}

## ArXiv:
{arxiv_md}

## Semantic Scholar:
{scholar_md}

## Web Insights:
{web_md}

Now synthesize this information into:
1. A research gap
2. A novel research direction
3. A full markdown-formatted research article (continuous, no section labels, academic tone)
"""

    response = call_llm([{"role": "user", "content": prompt}], max_tokens=3000)

    # Append Sources
    response += "\n\n---\n### Sources Cited\n"
    if arxiv_md:
        response += "**ArXiv:**\n" + arxiv_md
    if scholar_md:
        response += "**Semantic Scholar:**\n" + scholar_md
    if web_md:
        response += "**Web:**\n" + web_md

    # APA Citations Section
    all_citations = arxiv_citations + scholar_citations + web_citations
    response += "\n---\n### ๐Ÿ“š APA Citations\n"
    for cite in all_citations:
        response += f"- {cite}\n"

    return response, images

# --- Streamlit UI ---
st.set_page_config("Autonomous Research Assistant", layout="wide")
st.title("๐Ÿค– Autonomous AI Research Assistant")

if "chat_history" not in st.session_state:
    st.session_state.chat_history = []

topic = st.text_input("Enter a research topic:")
if st.button("Run Research Agent"):
    with st.spinner("Gathering sources & thinking..."):
        try:
            response, images = autonomous_research_agent(topic)

            # Display images
            if images:
                st.subheader("๐Ÿ–ผ๏ธ Relevant Images")
                st.image(images, width=300)

            # Display markdown response
            st.session_state.chat_history.append({"role": "user", "content": topic})
            st.session_state.chat_history.append({"role": "assistant", "content": response})
            st.markdown(response)
        except Exception as e:
            st.error(f"Failed: {e}")

# --- Follow-up Chat ---
st.divider()
st.subheader("๐Ÿ’ฌ Follow-up Q&A")
followup = st.text_input("Ask a follow-up question:")
if st.button("Ask"):
    if followup:
        try:
            chat = st.session_state.chat_history + [{"role": "user", "content": followup}]
            answer = call_llm(chat, max_tokens=1500)
            st.session_state.chat_history.append({"role": "user", "content": followup})
            st.session_state.chat_history.append({"role": "assistant", "content": answer})
            st.markdown(answer)
        except Exception as e:
            st.error(f"Follow-up error: {e}")