Spaces:
Running
Running
| """ | |
| File: web_app/module_agent_arxiv.py | |
| Description: an agent with a tool to search arXiv papers. | |
| Author: Didier Guillevic | |
| Date: 2025-10-23 | |
| """ | |
| import agents # openai-agents package | |
| from agents.extensions.models.litellm_model import LitellmModel | |
| import asyncio | |
| import pydantic | |
| import os | |
| import arxiv | |
| import openai | |
| import gradio as gr | |
| import logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # | |
| # ===== Language model configuration ===== | |
| # | |
| # ----- Locally hosted | |
| # ----- Cloud models | |
| gemini_model_id = "gemini/gemini-2.5-flash" | |
| gemini_api_key = os.environ["GEMINI_API_KEY"] | |
| model = LitellmModel(model=gemini_model_id, api_key=gemini_api_key) | |
| model_settings = agents.ModelSettings( | |
| max_tokens=2_048, | |
| temperature=0.3, | |
| include_usage=True, | |
| top_p=0.9 | |
| ) | |
| # | |
| # ===== Tool to search arXiv papers ===== | |
| # | |
| class PaperArxivInfo(pydantic.BaseModel): | |
| paper_id: str | |
| title: str | |
| authors: list[str] | |
| summary: str | |
| pdf_url: str | |
| published: str | |
| def search_arxiv(query: str, max_results: int=3) -> list[PaperArxivInfo]: | |
| """Search for scientific papers on arXiv. By default returns only top 3 results unless otherwise requested by user. | |
| Parameters: | |
| query: The search query. | |
| max_results: Maximum number of results (typically between 1 and 10). Default is 3. | |
| """ | |
| logger.info(f"Calling search_arxiv with query: {query} and max_results: {max_results}") | |
| # max_results (when not specified) is set to 3000000000000000 by some of | |
| # the open source LLMs. Hence, we will cap here. | |
| if max_results > 10: | |
| max_results = 10 | |
| logger.debug(f"max_results capped to: {max_results}") | |
| search = arxiv.Search( | |
| query=query, | |
| max_results=max_results, | |
| sort_by=arxiv.SortCriterion.Relevance | |
| ) | |
| results = arxiv.Client().results(search) | |
| papers = [] | |
| for result in results: | |
| paper_info = PaperArxivInfo( | |
| paper_id=result.get_short_id(), | |
| title=result.title, | |
| authors=[author.name for author in result.authors], | |
| summary=result.summary, | |
| pdf_url=result.pdf_url, | |
| published=result.published.strftime("%Y-%m-%d") | |
| ) | |
| print(f"{paper_info=}") | |
| papers.append(paper_info) | |
| return papers | |
| # | |
| # ===== Define the agent ===== | |
| # | |
| agent = agents.Agent( | |
| name="Arxiv searcher", | |
| instructions= | |
| ( | |
| "You are a friendly agent who can answer questions from your own " | |
| "knowledge or by searching the arXiv paper repository using the " | |
| "arXiv Search tool. " | |
| "When returning results about arXiv papers, you should provide " | |
| "the following information: paper titles, authors, summaries, " | |
| "as well as the url links to the papers. " | |
| ), | |
| tools=[search_arxiv,], | |
| model=model, | |
| model_settings=model_settings, | |
| ) | |
| async def agent_arxiv_search(query: str): | |
| """Run the arXiv search agent with the given query, streaming the response. | |
| Args: | |
| query (str): The input query for the agent. | |
| Yields: | |
| str: The streamed response from the agent. | |
| """ | |
| result = agents.Runner.run_streamed(agent, input=query) | |
| response = "" | |
| try: | |
| async for event in result.stream_events(): | |
| if ( | |
| (event.type == "raw_response_event") and | |
| (hasattr(event.data, 'delta')) and | |
| isinstance(event.data, openai.types.responses.ResponseTextDeltaEvent) | |
| ): | |
| response += event.data.delta | |
| yield response | |
| except Exception as e: | |
| yield f"Error: {str(e)}" | |
| # | |
| # ===== Gradio web app ===== | |
| # | |
| with gr.Blocks() as demo: | |
| gr.Markdown("**Agent with arXiv search tool**") | |
| with gr.Row(): | |
| input_text = gr.Textbox( | |
| lines=2, | |
| placeholder="Enter your query here...", | |
| label="Query", | |
| render=True | |
| ) | |
| with gr.Row(): | |
| submit_button = gr.Button("Submit", variant="primary") | |
| clear_button = gr.Button("Clear", variant="secondary") | |
| with gr.Row(): | |
| output_text = gr.Markdown( | |
| label="Agent Response", | |
| render=True | |
| ) | |
| with gr.Accordion("Examples", open=False): | |
| examples = gr.Examples( | |
| examples=[ | |
| ["What is the prime number factorization of 21?",], # no need got Google Search | |
| ["Can you search for a few papers on arXiv related to privacy preserving machine learning applied to language models.",], | |
| ["Find five papers on arXiv about graph neural networks applied to financial transactions such as Bitcoin transfers using an inductive unsupervised approach.",], | |
| ], | |
| inputs=[input_text,], | |
| cache_examples=False, | |
| label="Click to use an example" | |
| ) | |
| # ===== Button Actions ===== | |
| submit_button.click( | |
| fn=agent_arxiv_search, | |
| inputs=[input_text], | |
| outputs=[output_text] | |
| ) | |
| clear_button.click( | |
| fn=lambda : ('', None), | |
| inputs=None, | |
| outputs=[input_text, output_text] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(mcp_server=True) | |