File size: 3,955 Bytes
a5b7b6e
 
 
 
 
 
86310c5
44d5035
a5b7b6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44d5035
a5b7b6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# app.py β€” AI Research Assistant (Hugging Face-ready)

import os
import arxiv
import gradio as gr
from tavily import TavilyClient
from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage, HumanMessage

# ====== Environment Setup ======
# Hugging Face injects secrets automatically via Repo > Settings > Secrets
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")

if not OPENAI_API_KEY:
    raise ValueError("❌ Missing OPENAI_API_KEY. Add it in Hugging Face β†’ Settings β†’ Secrets.")
if not TAVILY_API_KEY:
    raise ValueError("❌ Missing TAVILY_API_KEY. Add it in Hugging Face β†’ Settings β†’ Secrets.")

# ====== Initialize Clients ======
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.2, api_key=OPENAI_API_KEY)
tavily = TavilyClient(api_key=TAVILY_API_KEY)

# ====== Search Functions ======
def search_web_tavily(query: str, k=5):
    """Search web using Tavily API."""
    res = tavily.search(query=query, max_results=k)
    return [
        f"[W{i}] {r['title']} ({r['url']})"
        for i, r in enumerate(res.get("results", [])[:k], start=1)
    ]

def search_arxiv(query: str, k=5):
    """Search research papers from arXiv API."""
    search = arxiv.Search(query=query, max_results=k)
    papers = []
    for i, r in enumerate(search.results(), start=1):
        # Extract and format publication date safely
        try:
            date = r.published.strftime("%Y-%m-%d")
        except Exception:
            date = "Unknown date"
        papers.append(f"[A{i}] {r.title} β€” Published: {date} β€” {r.pdf_url}")
    return papers

# ====== Summarization Logic ======
def summarize_research(query: str):
    """Combine Tavily + arXiv results and summarize them with inline citations."""
    if not query.strip():
        return "⚠️ Please enter a valid research topic."

    web_results = search_web_tavily(query)
    paper_results = search_arxiv(query)

    # Build structured context for the model
    context = f"""
Query: {query}

arXiv Papers:
{chr(10).join(paper_results) or '- None'}

Web Sources:
{chr(10).join(web_results) or '- None'}
"""

    # Define how to summarize content
    system_prompt = """
You are a precise academic research assistant.
Write a concise, structured summary of the topic in two sections:
1. Findings from arXiv Papers
2. Findings from Web Sources

Guidelines:
- Use bullet points.
- Cite each statement with the corresponding source ID (e.g., (A1), (W3)).
- If a paper includes a publication date, mention it in parentheses after the title.
- Do NOT invent IDs or include sources not in the list.
- Always include information from both arXiv and Web sources.
"""

    messages = [
        SystemMessage(content=system_prompt),
        HumanMessage(content=context),
    ]

    # Generate summary
    response = llm.invoke(messages)

    # Combine results into readable markdown
    summary = f"### 🧠 Summary\n{response.content}\n\n**Sources:**\n" + \
              "\n".join(web_results + paper_results)

    return summary

# ====== Gradio Interface ======
with gr.Blocks(
    title="AI Research Assistant",
    theme=gr.themes.Soft(),
    css="#summary_box {max-height: 900px; overflow-y: auto; white-space: pre-wrap;}"
) as demo:
    gr.Markdown(
        """
        # 🧠 AI Research Assistant  
        Enter a research topic to generate a concise academic-style report with citations.
        """
    )

    query_input = gr.Textbox(
        label="Research Query",
        placeholder="e.g., Latest advancements in quantum computing",
        lines=2,
    )

    submit_btn = gr.Button("πŸ” Search & Summarize")
    output = gr.Markdown(label="Summary", elem_id="summary_box", show_copy_button=True)

    submit_btn.click(summarize_research, inputs=query_input, outputs=output)

    gr.Markdown("---\nMade with ❀️ using OpenAI + LangChain + Tavily + Gradio\n")

# Launch for Hugging Face
demo.launch()