# services/master_tools.py from typing import Optional, Dict, Any, List from pydantic import BaseModel, Field, model_validator from langchain_core.tools import tool import os # Import your remote utilities from utilities.extract_text import extract_text_remote from utilities.extract_tables import extract_tables_remote from utilities.describe_images import describe_images_remote from utilities.summarizer import summarize_remote from utilities.classify import classify_remote from utilities.ner import ner_remote from utilities.translator import translate_remote from utilities.signature_verification import signature_verification_remote from utilities.stamp_detection import stamp_detection_remote # ---------- Agent Integration (Phase 1) ---------- def _use_agents() -> bool: """Check if agent mode is enabled via USE_AGENTS environment variable.""" return os.getenv("USE_AGENTS", "false").lower() == "true" def _get_agent_if_enabled(agent_name: str): """Get agent from registry if USE_AGENTS=true, otherwise return None.""" if not _use_agents(): return None try: from services.agents.agent_registry import get_agent return get_agent(agent_name) except Exception as e: # If agent system fails, fall back to utilities silently print(f"Warning: Agent system unavailable ({e}), using utility fallback") return None # ---------- Shared helpers ---------- def _base_state(file_path: str, start_page: int = 1, end_page: int = 1) -> Dict[str, Any]: """ Build the base state your utilities expect. """ filename = os.path.basename(file_path) return { "filename": filename, "temp_files": {filename: file_path}, "start_page": start_page, "end_page": end_page, } # ---------- Arg Schemas ---------- class FileSpanArgs(BaseModel): file_path: str = Field(..., description="Absolute/local path to the uploaded file") start_page: int = Field(1, description="Start page (1-indexed)", ge=1) end_page: int = Field(1, description="End page (inclusive, 1-indexed)", ge=1) class TextOrFileArgs(BaseModel): text: Optional[str] = Field(None, description="Raw text to process") file_path: Optional[str] = Field(None, description="Path to a document on disk (PDF/Image)") start_page: int = Field(1, description="Start page (1-indexed)", ge=1) end_page: int = Field(1, description="End page (inclusive, 1-indexed)", ge=1) @model_validator(mode="after") def validate_sources(self): if not self.text and not self.file_path: raise ValueError("Provide either text or file_path.") return self class TranslateArgs(TextOrFileArgs): target_lang: str = Field(..., description="Target language code or name (e.g., 'es' or 'Spanish')") class FinalizeArgs(BaseModel): content: Dict[str, Any] = Field(..., description="JSON payload to return directly to the user") # ---------- Tools ---------- @tool("extract_text", args_schema=FileSpanArgs) def extract_text_tool(file_path: str, start_page: int = 1, end_page: int = 1) -> Dict[str, Any]: """ Extract text from a document between start_page and end_page (inclusive). Use this when the user asks to read, analyze, or summarize document text. Returns: {"text": "..."} """ # Try agent path if enabled agent = _get_agent_if_enabled("extract_text") if agent: state = _base_state(file_path, start_page, end_page) result = agent.run(state) # Extract text field for compatibility text = result.get("text") or result.get("extracted_text") or "" return {"text": text} # Fallback to utility state = _base_state(file_path, start_page, end_page) out = extract_text_remote(state) text = out.get("text") or out.get("extracted_text") or "" return {"text": text} @tool("extract_tables", args_schema=FileSpanArgs) def extract_tables_tool(file_path: str, start_page: int = 1, end_page: int = 1) -> Dict[str, Any]: """ Extract tables from a document between start_page and end_page. Returns: {"tables": [...], "table_count": int} """ # Try agent path if enabled agent = _get_agent_if_enabled("extract_tables") if agent: state = _base_state(file_path, start_page, end_page) result = agent.run(state) tables = result.get("tables", []) return {"tables": tables, "table_count": len(tables)} # Fallback to utility state = _base_state(file_path, start_page, end_page) out = extract_tables_remote(state) tables = out.get("tables", []) return {"tables": tables, "table_count": len(tables)} @tool("describe_images", args_schema=FileSpanArgs) def describe_images_tool(file_path: str, start_page: int = 1, end_page: int = 1) -> Dict[str, Any]: """ Generate captions/descriptions for images in the specified page range. Returns: {"image_descriptions": ...} """ agent = _get_agent_if_enabled("describe_images") if agent: state = _base_state(file_path, start_page, end_page) result = agent.run(state) return {"image_descriptions": result.get("image_descriptions", result)} state = _base_state(file_path, start_page, end_page) out = describe_images_remote(state) return {"image_descriptions": out.get("image_descriptions", out)} @tool("summarize_text", args_schema=TextOrFileArgs) def summarize_text_tool(text: Optional[str] = None, file_path: Optional[str] = None, start_page: int = 1, end_page: int = 1) -> Dict[str, Any]: """ Summarize either raw text or a document (by file_path + optional page span). Returns: {"summary": "..."} """ state: Dict[str, Any] = { "text": text, "start_page": start_page, "end_page": end_page, } if file_path: state.update(_base_state(file_path, start_page, end_page)) agent = _get_agent_if_enabled("summarize") if agent: result = agent.run(state) return {"summary": result.get("summary", result)} out = summarize_remote(state) return {"summary": out.get("summary", out)} @tool("classify_text", args_schema=TextOrFileArgs) def classify_text_tool(text: Optional[str] = None, file_path: Optional[str] = None, start_page: int = 1, end_page: int = 1) -> Dict[str, Any]: """ Classify a text or document content. Returns: {"classification": ...} """ state: Dict[str, Any] = { "text": text, "start_page": start_page, "end_page": end_page, } if file_path: state.update(_base_state(file_path, start_page, end_page)) agent = _get_agent_if_enabled("classify") if agent: result = agent.run(state) return {"classification": result.get("classification", result)} out = classify_remote(state) return {"classification": out.get("classification", out)} @tool("extract_entities", args_schema=TextOrFileArgs) def extract_entities_tool(text: Optional[str] = None, file_path: Optional[str] = None, start_page: int = 1, end_page: int = 1) -> Dict[str, Any]: """ Perform Named Entity Recognition (NER) on text or a document. Returns: {"ner": ...} """ state: Dict[str, Any] = { "text": text, "start_page": start_page, "end_page": end_page, } if file_path: state.update(_base_state(file_path, start_page, end_page)) agent = _get_agent_if_enabled("ner") if agent: result = agent.run(state) return {"ner": result.get("ner", result)} out = ner_remote(state) return {"ner": out.get("ner", out)} @tool("translate_text", args_schema=TranslateArgs) def translate_text_tool(target_lang: str, text: Optional[str] = None, file_path: Optional[str] = None, start_page: int = 1, end_page: int = 1) -> Dict[str, Any]: """ Translate text or a document to target_lang (e.g., 'es', 'fr', 'de', 'Spanish'). Returns: {"translation": "...", "target_lang": "..."} """ state: Dict[str, Any] = { "text": text, "start_page": start_page, "end_page": end_page, "target_lang": target_lang, } if file_path: state.update(_base_state(file_path, start_page, end_page)) agent = _get_agent_if_enabled("translate") if agent: result = agent.run(state) return { "translation": result.get("translation", result), "target_lang": target_lang } out = translate_remote(state) return { "translation": out.get("translation", out), "target_lang": target_lang } @tool("signature_verification", args_schema=FileSpanArgs) def signature_verification_tool(file_path: str, start_page: int = 1, end_page: int = 1) -> Dict[str, Any]: """ Verify signatures/stamps presence and authenticity indicators in specified page range. Returns: {"signature_verification": ...} """ agent = _get_agent_if_enabled("signature_verification") if agent: state = _base_state(file_path, start_page, end_page) result = agent.run(state) return {"signature_verification": result.get("signature_verification", result)} state = _base_state(file_path, start_page, end_page) out = signature_verification_remote(state) return {"signature_verification": out.get("signature_verification", out)} @tool("stamp_detection", args_schema=FileSpanArgs) def stamp_detection_tool(file_path: str, start_page: int = 1, end_page: int = 1) -> Dict[str, Any]: """ Detect stamps in a document in the specified page range. Returns: {"stamp_detection": ...} """ agent = _get_agent_if_enabled("stamp_detection") if agent: state = _base_state(file_path, start_page, end_page) result = agent.run(state) return {"stamp_detection": result.get("stamp_detection", result)} state = _base_state(file_path, start_page, end_page) out = stamp_detection_remote(state) return {"stamp_detection": out.get("stamp_detection", out)} @tool("finalize", args_schema=FinalizeArgs, return_direct=True) def finalize_tool(content: Dict[str, Any]) -> Dict[str, Any]: """ FINAL STEP ONLY. Call this at the end to return a concise JSON result to the UI. Whatever you pass in 'content' is returned directly and ends the run. """ return content def get_master_tools() -> List[Any]: """ Export all tools for agent binding. """ return [ extract_text_tool, extract_tables_tool, describe_images_tool, summarize_text_tool, classify_text_tool, extract_entities_tool, translate_text_tool, signature_verification_tool, stamp_detection_tool, finalize_tool, ]