Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| import requests | |
| from dotenv import load_dotenv | |
| from markdownify import markdownify | |
| from requests.exceptions import RequestException | |
| from smolagents import ( | |
| LiteLLMModel, | |
| CodeAgent, | |
| ToolCallingAgent, | |
| InferenceClientModel, | |
| WebSearchTool, | |
| tool, | |
| FinalAnswerTool, | |
| WikipediaSearchTool, | |
| VisitWebpageTool, | |
| DuckDuckGoSearchTool | |
| ) | |
| load_dotenv() | |
| from langfuse import Langfuse,get_client | |
| langfuse = Langfuse(environment='PROD_V1') | |
| langfuse = get_client() | |
| if langfuse.auth_check(): | |
| print("Langfuse client is authenticated and ready!") | |
| else: | |
| print("Authentication failed. Please check your credentials and host.") | |
| from openinference.instrumentation.smolagents import SmolagentsInstrumentor | |
| SmolagentsInstrumentor().instrument() | |
| # Define model/provider to use | |
| model = LiteLLMModel( | |
| model_id="openai/Qwen/Qwen3-Coder-480B-A35B-Instruct", | |
| api_key=os.environ.get("NEBIUS_API_KEY"), | |
| api_base="https://api.tokenfactory.nebius.com/v1/" | |
| ) | |
| # Tools : use docstring to pass instructions to CodeAgent | |
| from tool_clinical_trial import ClinicalTrialsSearchTool | |
| def search_pubmed(topic: str, author: str) -> list[str]: | |
| """ | |
| Searches the PubMed database for articles related to a specific topic. | |
| Args: | |
| topic: The topic or keywords to search for (e.g., "CRISPR gene editing"). | |
| author: The name of the author to search for (e.g., "Albert Einstein"). | |
| Returns: | |
| A list of PubMed IDs (strings) for the top 100 articles found. | |
| Raises: | |
| requests.exceptions.HTTPError: If the API request fails. | |
| """ | |
| base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" | |
| terms = [] | |
| if topic: | |
| terms.append(topic) | |
| if author: | |
| terms.append(f"{author}[Author]") | |
| query = " AND ".join(terms) | |
| params = { | |
| "db": "pubmed", | |
| "term": query, | |
| "retmode": "json", | |
| "retmax": 1000 | |
| } | |
| response = requests.get(base_url, params=params) | |
| response.raise_for_status() | |
| data = response.json() | |
| return data["esearchresult"]["idlist"] | |
| def parse_pdf(pdf_path:str)->list[str]: | |
| """ | |
| Reads a PDF file from a specified path and extracts the text content | |
| from every page. | |
| Args: | |
| pdf_path: The local file path (string) to the PDF document to be parsed. | |
| **NOTE**: In a remote agent environment, this path must be | |
| accessible by the executing process (e.g., a path to an | |
| uploaded file). | |
| Returns: | |
| A list of strings, where each string is the extracted text content | |
| from a single page of the PDF. | |
| """ | |
| from pypdf import PdfReader | |
| reader = PdfReader(pdf_path) | |
| number_of_pages = len(reader.pages) | |
| text=list() | |
| for p in range(number_of_pages): | |
| page = reader.pages[p] | |
| text.append(page.extract_text()) | |
| return text | |
| # Create clinical trial search agent | |
| clinical_agent = CodeAgent( | |
| name="clinical_agent", | |
| description=( | |
| "Retrieve and parse clinical study data for a given disease. " | |
| "Use ClinicalTrialsSearchTool for trials, search_pubmed for authors, and parse_pdf for full-text analysis. " | |
| "Return structured tables or summaries as requested." | |
| "Gather general or recent information from online sources. " | |
| "Use Wikipedia for overviews, DuckDuckGo for recent data, and VisitWebpageTool for specific URLs. " | |
| "Return structured summaries with sources." | |
| "Use the ClinicalTrialsSearchTool() for any question related to clinical trial" | |
| ), | |
| tools=[ClinicalTrialsSearchTool()], | |
| additional_authorized_imports=["time", "numpy", "pandas"], | |
| # executor_type="blaxel", #executor_type="modal", | |
| return_full_result=True, | |
| planning_interval=3, # Structure planing | |
| use_structured_outputs_internally=True, # Uses output for planning | |
| model=model, | |
| max_steps=6, | |
| verbosity_level=2 | |
| ) | |
| search_online_info = CodeAgent( | |
| name="search_online_info", | |
| description=( | |
| "Gather general or recent information from online sources. " | |
| "Use Wikipedia for overviews, DuckDuckGo for recent data, and VisitWebpageTool for specific URLs. " | |
| "Return structured summaries with sources." | |
| ), | |
| tools=[WikipediaSearchTool(),VisitWebpageTool(max_output_length=10000),DuckDuckGoSearchTool(max_results=5),search_pubmed,parse_pdf], | |
| additional_authorized_imports=["time", "numpy", "pandas"], | |
| # use_structured_outputs_internally=True, | |
| # executor_type="modal", | |
| planning_interval=2, | |
| model=model, | |
| max_steps=4, | |
| verbosity_level=2 | |
| ) | |
| manager_agent = CodeAgent( | |
| name="manager_agent", | |
| description=( | |
| "Most important task is to provide a complete answer to user questions based on clinical trial data and online information. " | |
| "Orchestrate workflow between clinical and online agents. " | |
| "Validate outputs, resolve conflicts, and ensure the final answer is complete and accurate." | |
| "rimarily use the managed agent clinical_agent for question related to clinical trials" | |
| ), | |
| tools=[FinalAnswerTool(),ClinicalTrialsSearchTool(),WikipediaSearchTool(),VisitWebpageTool(max_output_length=10000),DuckDuckGoSearchTool(max_results=5),search_pubmed,parse_pdf], | |
| model=model, | |
| # managed_agents=[clinical_agent,search_online_info], | |
| # executor_type="modal", | |
| provide_run_summary=True, | |
| additional_authorized_imports=["time", "numpy", "pandas"], | |
| use_structured_outputs_internally=True, | |
| verbosity_level=2, | |
| planning_interval=3, | |
| max_steps=6, | |
| ) | |