""" File: web_app/module_agent_arxiv.py Description: an agent with a tool to search arXiv papers. Author: Didier Guillevic Date: 2025-10-21 """ import gradio as gr from google.adk.agents import Agent from google.adk.runners import Runner from google.adk.sessions import InMemorySessionService from google.adk.tools import google_search from google.genai import types import asyncio import uuid import pydantic import arxiv APP_NAME="arxiv_agent" SESSION_ID="1234" model = "gemini-2.5-flash" # # ===== tool: arXiv search ===== # class PaperArxivInfo(pydantic.BaseModel): paper_id: str title: str authors: list[str] summary: str pdf_url: str published: str def search_arxiv(query: str, max_results: int=3) -> list[PaperArxivInfo]: """Search for scientific papers on arXiv. By default returns only top 3 results unless otherwise requested by user. Parameters: query: The search query. max_results: Maximum number of results (typically between 1 and 10). Default is 3. """ print(f"[DEBUG] Tool search_arxiv received: query={query!r}, max_results={max_results!r}") print(f"Calling search_arxiv with query: {query} and max_results: {max_results}") # max_results is set to 3000000000000000 by the agent when not specified, so we cap it here if max_results > 10: max_results = 10 print(f"max_results capped to: {max_results}") search = arxiv.Search( query=query, max_results=max_results, sort_by=arxiv.SortCriterion.Relevance ) results = arxiv.Client().results(search) papers = [] for result in results: paper_info = PaperArxivInfo( paper_id=result.get_short_id(), title=result.title, authors=[author.name for author in result.authors], summary=result.summary, pdf_url=result.pdf_url, published=result.published.strftime("%Y-%m-%d") ) print(f"{paper_info=}") papers.append(paper_info) return papers # # ===== agent ===== # root_agent = Agent( name="arxiv_search_agent", model=model, description=( "Agent to answer questions with the option to call arXiv Search " "if needed for up-to-date scientific paper information." ), instruction=( "I can answer your questions from my own knowledge or by searching the " "arXiv paper repository using the arXiv Search tool. When returning " "results about arXiv papers, I should provide paper titles, authors, " "summaries, as well as links to the papers. " "Just ask me anything!" ), tools=[search_arxiv] ) # # ==== Session and Runner ===== # async def setup_session_and_runner(user_id: str): session_service = InMemorySessionService() session = await session_service.create_session( app_name=APP_NAME, user_id=user_id, session_id=SESSION_ID ) runner = Runner( agent=root_agent, app_name=APP_NAME, session_service=session_service ) return session, runner # # ==== Call Agent Asynchronously ===== # async def call_agent_async(query: str, user_id: str): content = types.Content(role='user', parts=[types.Part(text=query)]) session, runner = await setup_session_and_runner(user_id=user_id) events = runner.run_async( user_id=user_id, session_id=SESSION_ID, new_message=content ) final_response = "" async for event in events: if event.is_final_response(): final_response = event.content.parts[0].text return final_response # # ===== User interface Block ===== # def agent_arxiv_search(query: str, user_id=None): """Calls a language model agent with arXiv Search tool to answer the query. Args: query (str): The user query. user_id (str, optional): The user ID for session management. If None, a new ID is generated. Defaults to None. Returns: the agent's response (str). """ if user_id is None: user_id = str(uuid.uuid4()) # Generate a unique user ID response = asyncio.run(call_agent_async(query, user_id)) return response, user_id with gr.Blocks() as demo: gr.Markdown( """ **Agent with arXiv search tool**: be patient :-) Currently looking into (async) streaming support... """ ) with gr.Row(): input_text = gr.Textbox( lines=2, placeholder="Enter your query here...", label="Query", render=True ) user_id = gr.State(None) with gr.Row(): submit_button = gr.Button("Submit", variant="primary") clear_button = gr.Button("Clear", variant="secondary") with gr.Row(): output_text = gr.Markdown( label="Agent Response", render=True ) with gr.Accordion("Examples", open=False): examples = gr.Examples( examples=[ ["What is the prime number factorization of 21?",], # no need got Google Search ["Can you search for a few papers on arXiv related to privacy preserving machine learning applied to language models.",], ["Find five papers on arXiv about graph neural networks applied to financial transactions.",], ], inputs=[input_text,], cache_examples=False, label="Click to use an example" ) # ===== Button Actions ===== submit_button.click( fn=agent_arxiv_search, inputs=[input_text, user_id], outputs=[output_text, user_id] ) clear_button.click( fn=lambda : ('', None), inputs=None, outputs=[input_text, output_text] ) if __name__ == "__main__": demo.launch(mcp_server=True)