Spaces:
Sleeping
Sleeping
File size: 10,936 Bytes
a3b18ea 6df13ef a3b18ea 6df13ef a3b18ea 6df13ef a3b18ea 6df13ef a3b18ea 6df13ef a3b18ea 6df13ef a3b18ea 6df13ef a3b18ea 6df13ef a3b18ea 6df13ef a3b18ea 6df13ef a3b18ea bf45da8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 |
# services/master_tools.py
from typing import Optional, Dict, Any, List
from pydantic import BaseModel, Field, model_validator
from langchain_core.tools import tool
import os
# Import your remote utilities
from utilities.extract_text import extract_text_remote
from utilities.extract_tables import extract_tables_remote
from utilities.describe_images import describe_images_remote
from utilities.summarizer import summarize_remote
from utilities.classify import classify_remote
from utilities.ner import ner_remote
from utilities.translator import translate_remote
from utilities.signature_verification import signature_verification_remote
from utilities.stamp_detection import stamp_detection_remote
# ---------- Agent Integration (Phase 1) ----------
def _use_agents() -> bool:
"""Check if agent mode is enabled via USE_AGENTS environment variable."""
return os.getenv("USE_AGENTS", "false").lower() == "true"
def _get_agent_if_enabled(agent_name: str):
"""Get agent from registry if USE_AGENTS=true, otherwise return None."""
if not _use_agents():
return None
try:
from services.agents.agent_registry import get_agent
return get_agent(agent_name)
except Exception as e:
# If agent system fails, fall back to utilities silently
print(f"Warning: Agent system unavailable ({e}), using utility fallback")
return None
# ---------- Shared helpers ----------
def _base_state(file_path: str, start_page: int = 1, end_page: int = 1) -> Dict[str, Any]:
"""
Build the base state your utilities expect.
"""
filename = os.path.basename(file_path)
return {
"filename": filename,
"temp_files": {filename: file_path},
"start_page": start_page,
"end_page": end_page,
}
# ---------- Arg Schemas ----------
class FileSpanArgs(BaseModel):
file_path: str = Field(..., description="Absolute/local path to the uploaded file")
start_page: int = Field(1, description="Start page (1-indexed)", ge=1)
end_page: int = Field(1, description="End page (inclusive, 1-indexed)", ge=1)
class TextOrFileArgs(BaseModel):
text: Optional[str] = Field(None, description="Raw text to process")
file_path: Optional[str] = Field(None, description="Path to a document on disk (PDF/Image)")
start_page: int = Field(1, description="Start page (1-indexed)", ge=1)
end_page: int = Field(1, description="End page (inclusive, 1-indexed)", ge=1)
@model_validator(mode="after")
def validate_sources(self):
if not self.text and not self.file_path:
raise ValueError("Provide either text or file_path.")
return self
class TranslateArgs(TextOrFileArgs):
target_lang: str = Field(..., description="Target language code or name (e.g., 'es' or 'Spanish')")
class FinalizeArgs(BaseModel):
content: Dict[str, Any] = Field(..., description="JSON payload to return directly to the user")
# ---------- Tools ----------
@tool("extract_text", args_schema=FileSpanArgs)
def extract_text_tool(file_path: str, start_page: int = 1, end_page: int = 1) -> Dict[str, Any]:
"""
Extract text from a document between start_page and end_page (inclusive).
Use this when the user asks to read, analyze, or summarize document text.
Returns: {"text": "..."}
"""
# Try agent path if enabled
agent = _get_agent_if_enabled("extract_text")
if agent:
state = _base_state(file_path, start_page, end_page)
result = agent.run(state)
# Extract text field for compatibility
text = result.get("text") or result.get("extracted_text") or ""
return {"text": text}
# Fallback to utility
state = _base_state(file_path, start_page, end_page)
out = extract_text_remote(state)
text = out.get("text") or out.get("extracted_text") or ""
return {"text": text}
@tool("extract_tables", args_schema=FileSpanArgs)
def extract_tables_tool(file_path: str, start_page: int = 1, end_page: int = 1) -> Dict[str, Any]:
"""
Extract tables from a document between start_page and end_page.
Returns: {"tables": [...], "table_count": int}
"""
# Try agent path if enabled
agent = _get_agent_if_enabled("extract_tables")
if agent:
state = _base_state(file_path, start_page, end_page)
result = agent.run(state)
tables = result.get("tables", [])
return {"tables": tables, "table_count": len(tables)}
# Fallback to utility
state = _base_state(file_path, start_page, end_page)
out = extract_tables_remote(state)
tables = out.get("tables", [])
return {"tables": tables, "table_count": len(tables)}
@tool("describe_images", args_schema=FileSpanArgs)
def describe_images_tool(file_path: str, start_page: int = 1, end_page: int = 1) -> Dict[str, Any]:
"""
Generate captions/descriptions for images in the specified page range.
Returns: {"image_descriptions": ...}
"""
agent = _get_agent_if_enabled("describe_images")
if agent:
state = _base_state(file_path, start_page, end_page)
result = agent.run(state)
return {"image_descriptions": result.get("image_descriptions", result)}
state = _base_state(file_path, start_page, end_page)
out = describe_images_remote(state)
return {"image_descriptions": out.get("image_descriptions", out)}
@tool("summarize_text", args_schema=TextOrFileArgs)
def summarize_text_tool(text: Optional[str] = None, file_path: Optional[str] = None,
start_page: int = 1, end_page: int = 1) -> Dict[str, Any]:
"""
Summarize either raw text or a document (by file_path + optional page span).
Returns: {"summary": "..."}
"""
state: Dict[str, Any] = {
"text": text,
"start_page": start_page,
"end_page": end_page,
}
if file_path:
state.update(_base_state(file_path, start_page, end_page))
agent = _get_agent_if_enabled("summarize")
if agent:
result = agent.run(state)
return {"summary": result.get("summary", result)}
out = summarize_remote(state)
return {"summary": out.get("summary", out)}
@tool("classify_text", args_schema=TextOrFileArgs)
def classify_text_tool(text: Optional[str] = None, file_path: Optional[str] = None,
start_page: int = 1, end_page: int = 1) -> Dict[str, Any]:
"""
Classify a text or document content.
Returns: {"classification": ...}
"""
state: Dict[str, Any] = {
"text": text,
"start_page": start_page,
"end_page": end_page,
}
if file_path:
state.update(_base_state(file_path, start_page, end_page))
agent = _get_agent_if_enabled("classify")
if agent:
result = agent.run(state)
return {"classification": result.get("classification", result)}
out = classify_remote(state)
return {"classification": out.get("classification", out)}
@tool("extract_entities", args_schema=TextOrFileArgs)
def extract_entities_tool(text: Optional[str] = None, file_path: Optional[str] = None,
start_page: int = 1, end_page: int = 1) -> Dict[str, Any]:
"""
Perform Named Entity Recognition (NER) on text or a document.
Returns: {"ner": ...}
"""
state: Dict[str, Any] = {
"text": text,
"start_page": start_page,
"end_page": end_page,
}
if file_path:
state.update(_base_state(file_path, start_page, end_page))
agent = _get_agent_if_enabled("ner")
if agent:
result = agent.run(state)
return {"ner": result.get("ner", result)}
out = ner_remote(state)
return {"ner": out.get("ner", out)}
@tool("translate_text", args_schema=TranslateArgs)
def translate_text_tool(target_lang: str,
text: Optional[str] = None, file_path: Optional[str] = None,
start_page: int = 1, end_page: int = 1) -> Dict[str, Any]:
"""
Translate text or a document to target_lang (e.g., 'es', 'fr', 'de', 'Spanish').
Returns: {"translation": "...", "target_lang": "..."}
"""
state: Dict[str, Any] = {
"text": text,
"start_page": start_page,
"end_page": end_page,
"target_lang": target_lang,
}
if file_path:
state.update(_base_state(file_path, start_page, end_page))
agent = _get_agent_if_enabled("translate")
if agent:
result = agent.run(state)
return {
"translation": result.get("translation", result),
"target_lang": target_lang
}
out = translate_remote(state)
return {
"translation": out.get("translation", out),
"target_lang": target_lang
}
@tool("signature_verification", args_schema=FileSpanArgs)
def signature_verification_tool(file_path: str, start_page: int = 1, end_page: int = 1) -> Dict[str, Any]:
"""
Verify signatures/stamps presence and authenticity indicators in specified page range.
Returns: {"signature_verification": ...}
"""
agent = _get_agent_if_enabled("signature_verification")
if agent:
state = _base_state(file_path, start_page, end_page)
result = agent.run(state)
return {"signature_verification": result.get("signature_verification", result)}
state = _base_state(file_path, start_page, end_page)
out = signature_verification_remote(state)
return {"signature_verification": out.get("signature_verification", out)}
@tool("stamp_detection", args_schema=FileSpanArgs)
def stamp_detection_tool(file_path: str, start_page: int = 1, end_page: int = 1) -> Dict[str, Any]:
"""
Detect stamps in a document in the specified page range.
Returns: {"stamp_detection": ...}
"""
agent = _get_agent_if_enabled("stamp_detection")
if agent:
state = _base_state(file_path, start_page, end_page)
result = agent.run(state)
return {"stamp_detection": result.get("stamp_detection", result)}
state = _base_state(file_path, start_page, end_page)
out = stamp_detection_remote(state)
return {"stamp_detection": out.get("stamp_detection", out)}
@tool("finalize", args_schema=FinalizeArgs, return_direct=True)
def finalize_tool(content: Dict[str, Any]) -> Dict[str, Any]:
"""
FINAL STEP ONLY. Call this at the end to return a concise JSON result to the UI.
Whatever you pass in 'content' is returned directly and ends the run.
"""
return content
def get_master_tools() -> List[Any]:
"""
Export all tools for agent binding.
"""
return [
extract_text_tool,
extract_tables_tool,
describe_images_tool,
summarize_text_tool,
classify_text_tool,
extract_entities_tool,
translate_text_tool,
signature_verification_tool,
stamp_detection_tool,
finalize_tool,
] |