Spaces:

samwell
/

medrax2

Paused

App Files Files Community

VictorLJZ commited on Jun 21, 2025

Commit

f237c31

1 Parent(s): e0efe4c

added llm factory and browsing capabilities

Browse files

Files changed (5) hide show

main.py +31 -18
medrax/models/__init__.py +5 -0
medrax/models/model_factory.py +120 -0
medrax/tools/__init__.py +1 -0
medrax/tools/web_browser.py +205 -0

main.py CHANGED Viewed

@@ -5,9 +5,7 @@ from dotenv import load_dotenv
 from transformers import logging
 from langgraph.checkpoint.memory import MemorySaver
-from langchain_openai import ChatOpenAI
-from langgraph.checkpoint.memory import MemorySaver
-from langchain_openai import ChatOpenAI
 from interface import create_demo
 from medrax.agent import *
@@ -25,10 +23,10 @@ def initialize_agent(
     model_dir="/model-weights",
     temp_dir="temp",
     device="cuda",
-    model="chatgpt-4o-latest",
     temperature=0.7,
     top_p=0.95,
-    openai_kwargs={}
 ):
     """Initialize the MedRAX agent with specified tools and configuration.
@@ -38,10 +36,10 @@ def initialize_agent(
         model_dir (str, optional): Directory containing model weights. Defaults to "/model-weights".
         temp_dir (str, optional): Directory for temporary files. Defaults to "temp".
         device (str, optional): Device to run models on. Defaults to "cuda".
-        model (str, optional): Model to use. Defaults to "chatgpt-4o-latest".
         temperature (float, optional): Temperature for the model. Defaults to 0.7.
         top_p (float, optional): Top P for the model. Defaults to 0.95.
-        openai_kwargs (dict, optional): Additional keyword arguments for OpenAI API, such as API key and base URL.
     Returns:
         Tuple[Agent, Dict[str, BaseTool]]: Initialized agent and dictionary of tool instances
@@ -65,6 +63,7 @@ def initialize_agent(
         ),
         "ImageVisualizerTool": lambda: ImageVisualizerTool(),
         "DicomProcessorTool": lambda: DicomProcessorTool(temp_dir=temp_dir),
     }
     # Initialize only selected tools or all if none specified
@@ -75,9 +74,22 @@ def initialize_agent(
             tools_dict[tool_name] = all_tools[tool_name]()
     checkpointer = MemorySaver()
-    model = ChatOpenAI(model=model, temperature=temperature, top_p=top_p, **openai_kwargs)
     agent = Agent(
-        model,
         tools=list(tools_dict.values()),
         log_tools=True,
         log_dir="logs",
@@ -105,18 +117,19 @@ if __name__ == "__main__":
         "ChestXRaySegmentationTool",
         "ChestXRayReportGeneratorTool",
         "XRayVQATool",
         # "LlavaMedTool",
         # "XRayPhraseGroundingTool",
         # "ChestXRayGeneratorTool",
     ]
-    # Collect the ENV variables
-    openai_kwargs = {}
-    if api_key := os.getenv("OPENAI_API_KEY"):
-        openai_kwargs["api_key"] = api_key
-    if base_url := os.getenv("OPENAI_BASE_URL"):
-        openai_kwargs["base_url"] = base_url
     agent, tools_dict = initialize_agent(
         "medrax/docs/system_prompts.txt",
@@ -124,10 +137,10 @@ if __name__ == "__main__":
         model_dir="/model-weights",  # Change this to the path of the model weights
         temp_dir="temp",  # Change this to the path of the temporary directory
         device="cuda",  # Change this to the device you want to use
-        model="gpt-4o",  # Change this to the model you want to use, e.g. gpt-4o-mini
         temperature=0.7,
         top_p=0.95,
-        openai_kwargs=openai_kwargs
     )
     demo = create_demo(agent, tools_dict)

 from transformers import logging
 from langgraph.checkpoint.memory import MemorySaver
+from medrax.models import ModelFactory
 from interface import create_demo
 from medrax.agent import *
     model_dir="/model-weights",
     temp_dir="temp",
     device="cuda",
+    model="gpt-4o",
     temperature=0.7,
     top_p=0.95,
+    model_kwargs={}
 ):
     """Initialize the MedRAX agent with specified tools and configuration.
         model_dir (str, optional): Directory containing model weights. Defaults to "/model-weights".
         temp_dir (str, optional): Directory for temporary files. Defaults to "temp".
         device (str, optional): Device to run models on. Defaults to "cuda".
+        model (str, optional): Model to use. Defaults to "gpt-4o".
         temperature (float, optional): Temperature for the model. Defaults to 0.7.
         top_p (float, optional): Top P for the model. Defaults to 0.95.
+        model_kwargs (dict, optional): Additional keyword arguments for model.
     Returns:
         Tuple[Agent, Dict[str, BaseTool]]: Initialized agent and dictionary of tool instances
         ),
         "ImageVisualizerTool": lambda: ImageVisualizerTool(),
         "DicomProcessorTool": lambda: DicomProcessorTool(temp_dir=temp_dir),
+        "WebBrowserTool": lambda: WebBrowserTool(),
     }
     # Initialize only selected tools or all if none specified
             tools_dict[tool_name] = all_tools[tool_name]()
     checkpointer = MemorySaver()
+    # Create the language model using the factory
+    try:
+        llm = ModelFactory.create_model(
+            model_name=model,
+            temperature=temperature,
+            top_p=top_p,
+            **model_kwargs
+        )
+    except ValueError as e:
+        print(f"Error creating language model: {e}")
+        print(f"Available model providers: {list(ModelFactory._model_providers.keys())}")
+        raise
     agent = Agent(
+        llm,
         tools=list(tools_dict.values()),
         log_tools=True,
         log_dir="logs",
         "ChestXRaySegmentationTool",
         "ChestXRayReportGeneratorTool",
         "XRayVQATool",
+        "WebBrowserTool",  # Add the web browser tool
         # "LlavaMedTool",
         # "XRayPhraseGroundingTool",
         # "ChestXRayGeneratorTool",
     ]
+    # Prepare any additional model-specific kwargs
+    model_kwargs = {}
+    # Set up API keys for the web browser tool
+    # You'll need to set these environment variables:
+    # - GOOGLE_SEARCH_API_KEY: Your Google Custom Search API key
+    # - GOOGLE_SEARCH_ENGINE_ID: Your Google Custom Search Engine ID
     agent, tools_dict = initialize_agent(
         "medrax/docs/system_prompts.txt",
         model_dir="/model-weights",  # Change this to the path of the model weights
         temp_dir="temp",  # Change this to the path of the temporary directory
         device="cuda",  # Change this to the device you want to use
+        model="gpt-4o",  # Change this to the model you want to use, e.g. gpt-4o-mini, gemini-2.5-pro
         temperature=0.7,
         top_p=0.95,
+        model_kwargs=model_kwargs
     )
     demo = create_demo(agent, tools_dict)

medrax/models/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""Model module for MedRAX."""
+from .model_factory import ModelFactory
+__all__ = ["ModelFactory"]

medrax/models/model_factory.py ADDED Viewed

	@@ -0,0 +1,120 @@

+"""Factory for creating language model instances based on model name."""
+import os
+from typing import Dict, Any, Type
+from langchain_core.language_models import BaseLanguageModel
+from langchain_openai import ChatOpenAI
+from langchain_google_genai import ChatGoogleGenerativeAI
+class ModelFactory:
+    """Factory for creating language model instances based on model name.
+    This class implements a registry of language model providers and provides
+    methods to create appropriate language model instances based on the model name.
+    """
+    # Registry of model providers
+    _model_providers = {
+        "gpt": {
+            "class": ChatOpenAI,
+            "env_key": "OPENAI_API_KEY",
+            "base_url_key": "OPENAI_BASE_URL"
+        },
+        "gemini": {
+            "class": ChatGoogleGenerativeAI,
+            "env_key": "GOOGLE_API_KEY"
+        },
+        # Add more providers with default configurations here
+    }
+    @classmethod
+    def register_provider(cls, prefix: str, model_class: Type[BaseLanguageModel],
+                          env_key: str, **kwargs) -> None:
+        """Register a new model provider.
+        Args:
+            prefix (str): The prefix used to identify this model provider (e.g., 'gpt', 'gemini')
+            model_class (Type[BaseLanguageModel]): The LangChain model class to use
+            env_key (str): The environment variable name for the API key
+            **kwargs: Additional provider-specific configuration
+        """
+        cls._model_providers[prefix] = {
+            "class": model_class,
+            "env_key": env_key,
+            **kwargs
+        }
+    @classmethod
+    def create_model(cls, model_name: str, temperature: float = 0.7,
+                     top_p: float = 0.95, **kwargs) -> BaseLanguageModel:
+        """Create and return an instance of the appropriate language model.
+        Args:
+            model_name (str): Name of the model to create (e.g., 'gpt-4o', 'gemini-2.5-pro')
+            temperature (float, optional): Temperature parameter. Defaults to 0.7.
+            top_p (float, optional): Top-p sampling parameter. Defaults to 0.95.
+            **kwargs: Additional model-specific parameters
+        Returns:
+            BaseLanguageModel: An initialized language model instance
+        Raises:
+            ValueError: If no provider is found for the given model name
+            ValueError: If the required API key is missing
+        """
+        # Find the matching provider based on model name prefix
+        provider_prefix = next(
+            (prefix for prefix in cls._model_providers if model_name.startswith(prefix)),
+            None
+        )
+        if not provider_prefix:
+            raise ValueError(
+                f"No provider found for model: {model_name}. "
+                f"Registered providers are for: {list(cls._model_providers.keys())}"
+            )
+        provider = cls._model_providers[provider_prefix]
+        model_class = provider["class"]
+        env_key = provider["env_key"]
+        # Set up provider-specific kwargs
+        provider_kwargs = {}
+        # Handle API key
+        if env_key in os.environ:
+            provider_kwargs["api_key"] = os.environ[env_key]
+        else:
+            # Log warning but don't fail - the model class might handle missing API keys differently
+            print(f"Warning: Environment variable {env_key} not found. Authentication may fail.")
+        # Check for base_url if applicable
+        if "base_url_key" in provider and provider["base_url_key"] in os.environ:
+            provider_kwargs["base_url"] = os.environ[provider["base_url_key"]]
+        # Merge with any additional provider-specific settings from the registry
+        for k, v in provider.items():
+            if k not in ["class", "env_key", "base_url_key"]:
+                provider_kwargs[k] = v
+        # Create and return the model instance
+        return model_class(
+            model=model_name,
+            temperature=temperature,
+            top_p=top_p,
+            **provider_kwargs,
+            **kwargs
+        )
+    @classmethod
+    def list_providers(cls) -> Dict[str, Dict[str, Any]]:
+        """List all registered model providers.
+        Returns:
+            Dict[str, Dict[str, Any]]: Dictionary of registered providers and their configurations
+        """
+        # Return a copy to prevent accidental modification
+        return {k: {kk: vv for kk, vv in v.items() if kk != "class"}
+                for k, v in cls._model_providers.items()}

medrax/tools/__init__.py CHANGED Viewed

@@ -9,3 +9,4 @@ from .grounding import *
 from .generation import *
 from .dicom import *
 from .utils import *

 from .generation import *
 from .dicom import *
 from .utils import *
+from .web_browser import *

medrax/tools/web_browser.py ADDED Viewed

	@@ -0,0 +1,205 @@

+"""Web browser tool for MedRAX2.
+This module implements a web browsing tool for MedRAX2, allowing the agent
+to search the web, visit URLs, and extract information from web pages.
+"""
+import os
+import re
+import json
+from typing import Dict, Optional, Any
+from urllib.parse import urlparse
+import requests
+from bs4 import BeautifulSoup
+from langchain_core.tools import BaseTool
+from pydantic import BaseModel, Field
+class SearchQuerySchema(BaseModel):
+    """Schema for web search queries."""
+    query: str = Field(..., description="The search query string")
+class VisitUrlSchema(BaseModel):
+    """Schema for URL visits."""
+    url: str = Field(..., description="The URL to visit")
+class WebBrowserTool(BaseTool):
+    """Tool for browsing the web, searching for information, and visiting URLs.
+    This tool provides the agent with internet browsing capabilities, including:
+    1. Performing web searches using a search engine API
+    2. Visiting specific URLs and extracting their content
+    3. Following links within pages
+    """
+    name: str = "WebBrowserTool"
+    description: str = "Search the web for information or visit specific URLs to retrieve content"
+    search_api_key: Optional[str] = None
+    search_engine_id: Optional[str] = None
+    user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+    max_results: int = 5
+    def __init__(self, search_api_key: Optional[str] = None, search_engine_id: Optional[str] = None, **kwargs):
+        """Initialize the web browser tool.
+        Args:
+            search_api_key: Google Custom Search API key (optional)
+            search_engine_id: Google Custom Search Engine ID (optional)
+            **kwargs: Additional keyword arguments
+        """
+        super().__init__(**kwargs)
+        # Try to get API keys from environment variables if not provided
+        self.search_api_key = search_api_key or os.environ.get("GOOGLE_SEARCH_API_KEY")
+        self.search_engine_id = search_engine_id or os.environ.get("GOOGLE_SEARCH_ENGINE_ID")
+    def search_web(self, query: str) -> Dict[str, Any]:
+        """Search the web using Google Custom Search API.
+        Args:
+            query: The search query string
+        Returns:
+            Dict containing search results
+        """
+        if not self.search_api_key or not self.search_engine_id:
+            return {
+                "error": "Search API key or engine ID not configured. Please set GOOGLE_SEARCH_API_KEY and GOOGLE_SEARCH_ENGINE_ID environment variables."
+            }
+        url = "https://www.googleapis.com/customsearch/v1"
+        params = {
+            "key": self.search_api_key,
+            "cx": self.search_engine_id,
+            "q": query,
+            "num": self.max_results
+        }
+        try:
+            response = requests.get(url, params=params, timeout=10)
+            response.raise_for_status()
+            results = response.json()
+            if "items" not in results:
+                return {"results": [], "message": "No results found"}
+            formatted_results = []
+            for item in results["items"]:
+                formatted_results.append({
+                    "title": item.get("title"),
+                    "link": item.get("link"),
+                    "snippet": item.get("snippet"),
+                    "source": item.get("displayLink")
+                })
+            return {
+                "results": formatted_results,
+                "message": f"Found {len(formatted_results)} results for query: {query}"
+            }
+        except Exception as e:
+            return {"error": f"Search failed: {str(e)}"}
+    def visit_url(self, url: str) -> Dict[str, Any]:
+        """Visit a URL and extract its content.
+        Args:
+            url: The URL to visit
+        Returns:
+            Dict containing the page content, title, and metadata
+        """
+        try:
+            # Validate URL
+            parsed_url = urlparse(url)
+            if not parsed_url.scheme or not parsed_url.netloc:
+                return {"error": f"Invalid URL: {url}"}
+            headers = {"User-Agent": self.user_agent}
+            response = requests.get(url, headers=headers, timeout=15)
+            response.raise_for_status()
+            # Parse the HTML content
+            soup = BeautifulSoup(response.text, "html.parser")
+            # Extract title
+            title = soup.title.string if soup.title else "No title"
+            # Extract main content (remove scripts, styles, etc.)
+            for script in soup(["script", "style", "meta", "noscript"]):
+                script.extract()
+            # Get text content
+            text_content = soup.get_text(separator="\n", strip=True)
+            # Clean up whitespace
+            text_content = re.sub(r'\n+', '\n', text_content)
+            text_content = re.sub(r' +', ' ', text_content)
+            # Extract links
+            links = []
+            for link in soup.find_all("a", href=True):
+                href = link["href"]
+                # Handle relative URLs
+                if href.startswith("/"):
+                    base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
+                    href = base_url + href
+                if href.startswith(("http://", "https://")):
+                    links.append({
+                        "text": link.get_text(strip=True) or href,
+                        "url": href
+                    })
+            # Extract images (limited to first 3)
+            images = []
+            for i, img in enumerate(soup.find_all("img", src=True)[:3]):
+                src = img["src"]
+                # Handle relative URLs
+                if src.startswith("/"):
+                    base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
+                    src = base_url + src
+                if src.startswith(("http://", "https://")):
+                    images.append(src)
+            return {
+                "title": title,
+                "content": text_content[:10000] if len(text_content) > 10000 else text_content,
+                "url": url,
+                "links": links[:10],  # Limit to 10 links
+                "images": images,
+                "content_type": response.headers.get("Content-Type", ""),
+                "content_length": len(text_content),
+                "truncated": len(text_content) > 10000
+            }
+        except Exception as e:
+            return {"error": f"Failed to visit {url}: {str(e)}"}
+    async def _arun(self, query: str = "", url: str = "") -> str:
+        """Run the tool asynchronously."""
+        return json.dumps(self._run(query=query, url=url))
+    def _run(self, query: str = "", url: str = "") -> Dict[str, Any]:
+        """Run the web browser tool.
+        Args:
+            query: Search query (if searching)
+            url: URL to visit (if visiting a specific page)
+        Returns:
+            Dict containing the results
+        """
+        if url:
+            return self.visit_url(url)
+        elif query:
+            return self.search_web(query)
+        else:
+            return {"error": "Please provide either a search query or a URL to visit"}
+    def args_schema(self) -> type[BaseModel]:
+        """Return the schema for the tool arguments."""
+        class WebBrowserSchema(BaseModel):
+            """Combined schema for web browser tool."""
+            query: str = Field("", description="The search query (leave empty if visiting a URL)")
+            url: str = Field("", description="The URL to visit (leave empty if performing a search)")
+        return WebBrowserSchema