Spaces:
Sleeping
Sleeping
| # app.py β AI Research Assistant (Hugging Face-ready) | |
| import os | |
| import arxiv | |
| import gradio as gr | |
| from tavily import TavilyClient | |
| from langchain_openai import ChatOpenAI | |
| from langchain_core.messages import SystemMessage, HumanMessage | |
| # ====== Environment Setup ====== | |
| # Hugging Face injects secrets automatically via Repo > Settings > Secrets | |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
| TAVILY_API_KEY = os.getenv("TAVILY_API_KEY") | |
| if not OPENAI_API_KEY: | |
| raise ValueError("β Missing OPENAI_API_KEY. Add it in Hugging Face β Settings β Secrets.") | |
| if not TAVILY_API_KEY: | |
| raise ValueError("β Missing TAVILY_API_KEY. Add it in Hugging Face β Settings β Secrets.") | |
| # ====== Initialize Clients ====== | |
| llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.2, api_key=OPENAI_API_KEY) | |
| tavily = TavilyClient(api_key=TAVILY_API_KEY) | |
| # ====== Search Functions ====== | |
| def search_web_tavily(query: str, k=5): | |
| """Search web using Tavily API.""" | |
| res = tavily.search(query=query, max_results=k) | |
| return [ | |
| f"[W{i}] {r['title']} ({r['url']})" | |
| for i, r in enumerate(res.get("results", [])[:k], start=1) | |
| ] | |
| def search_arxiv(query: str, k=5): | |
| """Search research papers from arXiv API.""" | |
| search = arxiv.Search(query=query, max_results=k) | |
| papers = [] | |
| for i, r in enumerate(search.results(), start=1): | |
| # Extract and format publication date safely | |
| try: | |
| date = r.published.strftime("%Y-%m-%d") | |
| except Exception: | |
| date = "Unknown date" | |
| papers.append(f"[A{i}] {r.title} β Published: {date} β {r.pdf_url}") | |
| return papers | |
| # ====== Summarization Logic ====== | |
| def summarize_research(query: str): | |
| """Combine Tavily + arXiv results and summarize them with inline citations.""" | |
| if not query.strip(): | |
| return "β οΈ Please enter a valid research topic." | |
| web_results = search_web_tavily(query) | |
| paper_results = search_arxiv(query) | |
| # Build structured context for the model | |
| context = f""" | |
| Query: {query} | |
| arXiv Papers: | |
| {chr(10).join(paper_results) or '- None'} | |
| Web Sources: | |
| {chr(10).join(web_results) or '- None'} | |
| """ | |
| # Define how to summarize content | |
| system_prompt = """ | |
| You are a precise academic research assistant. | |
| Write a concise, structured summary of the topic in two sections: | |
| 1. Findings from arXiv Papers | |
| 2. Findings from Web Sources | |
| Guidelines: | |
| - Use bullet points. | |
| - Cite each statement with the corresponding source ID (e.g., (A1), (W3)). | |
| - If a paper includes a publication date, mention it in parentheses after the title. | |
| - Do NOT invent IDs or include sources not in the list. | |
| - Always include information from both arXiv and Web sources. | |
| """ | |
| messages = [ | |
| SystemMessage(content=system_prompt), | |
| HumanMessage(content=context), | |
| ] | |
| # Generate summary | |
| response = llm.invoke(messages) | |
| # Combine results into readable markdown | |
| summary = f"### π§ Summary\n{response.content}\n\n**Sources:**\n" + \ | |
| "\n".join(web_results + paper_results) | |
| return summary | |
| # ====== Gradio Interface ====== | |
| with gr.Blocks( | |
| title="AI Research Assistant", | |
| theme=gr.themes.Soft(), | |
| css="#summary_box {max-height: 900px; overflow-y: auto; white-space: pre-wrap;}" | |
| ) as demo: | |
| gr.Markdown( | |
| """ | |
| # π§ AI Research Assistant | |
| Enter a research topic to generate a concise academic-style report with citations. | |
| """ | |
| ) | |
| query_input = gr.Textbox( | |
| label="Research Query", | |
| placeholder="e.g., Latest advancements in quantum computing", | |
| lines=2, | |
| ) | |
| submit_btn = gr.Button("π Search & Summarize") | |
| output = gr.Markdown(label="Summary", elem_id="summary_box", show_copy_button=True) | |
| submit_btn.click(summarize_research, inputs=query_input, outputs=output) | |
| gr.Markdown("---\nMade with β€οΈ using OpenAI + LangChain + Tavily + Gradio\n") | |
| # Launch for Hugging Face | |
| demo.launch() | |