Spaces:
Running
Running
arterm-sedov
commited on
Commit
·
2fbcd2d
1
Parent(s):
dd70fb1
Fixed broken main
Browse files- .misc_files/misc_updates_rich_content_patch.diff +0 -0
- README.md +37 -18
- agent_ng/_tests/test_analyze_tools.py +88 -0
- agent_ng/app_ng_modular.py +5 -3
- agent_ng/langchain_memory.py +0 -15
- tools/file_utils.py +254 -3
- tools/tools.py +236 -23
.misc_files/misc_updates_rich_content_patch.diff
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
README.md
CHANGED
|
@@ -493,27 +493,46 @@ The codebase follows a clean modular design with clear separation of concerns:
|
|
| 493 |
|
| 494 |
### Tab Modules (`agent_ng/tabs/`)
|
| 495 |
|
| 496 |
-
- **`chat_tab.py`**: Main chat interface tab with quick
|
| 497 |
-
- **`logs_tab.py`**:
|
| 498 |
-
- **`stats_tab.py`**:
|
|
|
|
|
|
|
|
|
|
| 499 |
|
| 500 |
### Tool Modules (`tools/`)
|
| 501 |
|
| 502 |
-
- **`tools.py`**: Core tool functions and consolidated tool definitions with 20+ tools
|
| 503 |
-
-
|
| 504 |
-
-
|
| 505 |
-
-
|
| 506 |
-
-
|
| 507 |
-
-
|
| 508 |
-
-
|
| 509 |
-
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
-
|
| 513 |
-
-
|
| 514 |
-
-
|
| 515 |
-
|
| 516 |
-
- **`
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 517 |
|
| 518 |
### Key Benefits
|
| 519 |
|
|
|
|
| 493 |
|
| 494 |
### Tab Modules (`agent_ng/tabs/`)
|
| 495 |
|
| 496 |
+
- **`chat_tab.py`**: Main chat interface tab with streaming responses, quick action buttons, file upload support, and full i18n support (English/Russian)
|
| 497 |
+
- **`logs_tab.py`**: Real-time debugging and logs tab with live updates, categorized log streams, and session-specific debug output
|
| 498 |
+
- **`stats_tab.py`**: Performance metrics and statistics dashboard with live monitoring, token usage tracking, and LLM provider analytics
|
| 499 |
+
- **`config_tab.py`**: Configuration and settings tab for LLM provider selection, language settings, and system parameters
|
| 500 |
+
- **`home_tab.py`**: Welcome and overview tab with quick start guides, feature highlights, and system status
|
| 501 |
+
- **`sidebar.py`**: Navigation sidebar component with tab switching, user session info, and quick access controls
|
| 502 |
|
| 503 |
### Tool Modules (`tools/`)
|
| 504 |
|
| 505 |
+
- **`tools.py`**: Core tool functions and consolidated tool definitions with 20+ specialized tools including:
|
| 506 |
+
- **Math Tools**: Basic arithmetic operations (add, subtract, multiply, divide, power, square root)
|
| 507 |
+
- **Web Search Tools**: Tavily web search, Wikipedia search, Arxiv academic papers, Exa AI deep research
|
| 508 |
+
- **File Analysis Tools**: Text file reading, CSV/Excel analysis with pandas, image analysis and OCR
|
| 509 |
+
- **Code Execution**: Multi-language code interpreter (Python, Bash, SQL, C, Java) with safety controls
|
| 510 |
+
- **Image Processing**: Image generation, transformation, drawing, and combination tools
|
| 511 |
+
- **Video/Audio Understanding**: Gemini-powered video and audio analysis with timestamp support
|
| 512 |
+
- **Data Processing**: Advanced pandas-based data analysis with query support and visualization
|
| 513 |
+
|
| 514 |
+
- **`applications_tools/`**: CMW Platform application and template management
|
| 515 |
+
- `tool_list_applications.py`: List and manage platform applications
|
| 516 |
+
- `tool_list_templates.py`: List application templates and their configurations
|
| 517 |
+
- `tool_platform_entity_url.py`: Generate direct URLs to platform entities
|
| 518 |
+
|
| 519 |
+
- **`attributes_tools/`**: Comprehensive attribute management for all CMW Platform attribute types
|
| 520 |
+
- **Core Attribute Types**: Text, Boolean, DateTime, Decimal/Numeric, Document, Drawing, Duration, Image, Record, Role, Account, Enum
|
| 521 |
+
- **Management Operations**: Create, edit, delete, archive/unarchive, and retrieve attributes
|
| 522 |
+
- **Specialized Tools**: Each attribute type has dedicated creation and management tools
|
| 523 |
+
- **Utility Functions**: Common attribute operations and validation helpers
|
| 524 |
+
|
| 525 |
+
- **`templates_tools/`**: Template-related operations and management
|
| 526 |
+
- `tool_list_attributes.py`: List and analyze template attributes
|
| 527 |
+
- `tools_record_template.py`: Create and manage record templates
|
| 528 |
+
- Template configuration and relationship management
|
| 529 |
+
|
| 530 |
+
- **Utility Modules**:
|
| 531 |
+
- **`tool_utils.py`**: Common tool utilities, validation, and helper functions
|
| 532 |
+
- **`models.py`**: Pydantic data models and schemas for tool operations
|
| 533 |
+
- **`requests_.py`**: HTTP request utilities with retry logic and error handling
|
| 534 |
+
- **`file_utils.py`**: Secure file handling utilities with session isolation and MIME detection
|
| 535 |
+
- **`pdf_utils.py`**: PDF processing utilities with OCR support and text extraction
|
| 536 |
|
| 537 |
### Key Benefits
|
| 538 |
|
agent_ng/_tests/test_analyze_tools.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import sys
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import pytest
|
| 6 |
+
|
| 7 |
+
# Ensure project root is on sys.path to import tools
|
| 8 |
+
PROJECT_ROOT = Path(__file__).resolve().parents[2]
|
| 9 |
+
if str(PROJECT_ROOT) not in sys.path:
|
| 10 |
+
sys.path.insert(0, str(PROJECT_ROOT))
|
| 11 |
+
|
| 12 |
+
import tools.tools as t # noqa: E402
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def write_csv(tmp_path: Path) -> Path:
|
| 16 |
+
df = pd.DataFrame({
|
| 17 |
+
"A": [1, 2, 3, 4],
|
| 18 |
+
"B": [0.5, 1.5, 2.5, 3.5],
|
| 19 |
+
"C": ["x", "y", "x", "z"],
|
| 20 |
+
})
|
| 21 |
+
p = tmp_path / "sample.csv"
|
| 22 |
+
df.to_csv(p, index=False)
|
| 23 |
+
return p
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def write_excel(tmp_path: Path) -> Path:
|
| 27 |
+
df = pd.DataFrame({
|
| 28 |
+
"A": [10, 20, 30, 40],
|
| 29 |
+
"B": [5, 15, 25, 35],
|
| 30 |
+
"C": ["u", "v", "u", "w"],
|
| 31 |
+
})
|
| 32 |
+
p = tmp_path / "sample.xlsx"
|
| 33 |
+
try:
|
| 34 |
+
df.to_excel(p, index=False)
|
| 35 |
+
except Exception as e: # pragma: no cover
|
| 36 |
+
pytest.skip(f"Excel engine not available: {e}")
|
| 37 |
+
return p
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def parse_tool_response(s: str):
|
| 41 |
+
return json.loads(s)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def test_helper_empty_query_preview():
|
| 45 |
+
df = pd.DataFrame({
|
| 46 |
+
"A": [1, 2, 3, 4],
|
| 47 |
+
"B": [0.5, 1.5, 2.5, 3.5],
|
| 48 |
+
"C": ["x", "y", "x", "z"],
|
| 49 |
+
})
|
| 50 |
+
_, payload = t._apply_pandas_query(df, query=None, preview_opts=None, plot_opts=None)
|
| 51 |
+
assert payload.get("table_markdown")
|
| 52 |
+
assert payload.get("schema")
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def test_helper_expr_query():
|
| 56 |
+
df = pd.DataFrame({
|
| 57 |
+
"A": [1, 2, 3, 4],
|
| 58 |
+
"B": [0.5, 1.5, 2.5, 3.5],
|
| 59 |
+
"C": ["x", "y", "x", "z"],
|
| 60 |
+
})
|
| 61 |
+
_, payload = t._apply_pandas_query(df, query="expr: B > 1.0", preview_opts=None, plot_opts=None)
|
| 62 |
+
assert payload.get("table_markdown")
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def test_helper_pipeline_query():
|
| 66 |
+
df = pd.DataFrame({
|
| 67 |
+
"A": [1, 2, 3, 4],
|
| 68 |
+
"B": [0.5, 1.5, 2.5, 3.5],
|
| 69 |
+
"C": ["x", "y", "x", "z"],
|
| 70 |
+
})
|
| 71 |
+
pipeline = json.dumps([
|
| 72 |
+
{"op": "query", "expr": "B > 1.0"},
|
| 73 |
+
{"op": "head", "n": 2},
|
| 74 |
+
])
|
| 75 |
+
_, payload = t._apply_pandas_query(df, query=pipeline, preview_opts=None, plot_opts=None)
|
| 76 |
+
assert payload.get("table_markdown")
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def test_helper_preview_includes_shape_and_schema():
|
| 80 |
+
df = pd.DataFrame({
|
| 81 |
+
"A": [1, 2, 3, 4],
|
| 82 |
+
"B": [0.5, 1.5, 2.5, 3.5],
|
| 83 |
+
})
|
| 84 |
+
_, payload = t._apply_pandas_query(df, query=None, preview_opts=None, plot_opts=None)
|
| 85 |
+
assert "shape" in payload and isinstance(payload["shape"], tuple)
|
| 86 |
+
assert "schema" in payload and isinstance(payload["schema"], dict)
|
| 87 |
+
|
| 88 |
+
|
agent_ng/app_ng_modular.py
CHANGED
|
@@ -628,11 +628,13 @@ class NextGenApp:
|
|
| 628 |
and user_agent.llm_instance
|
| 629 |
):
|
| 630 |
llm_info = user_agent.get_llm_info()
|
| 631 |
-
|
| 632 |
-
|
|
|
|
| 633 |
)
|
| 634 |
else:
|
| 635 |
-
|
|
|
|
| 636 |
|
| 637 |
# Use session-specific debug streamer
|
| 638 |
session_debug = get_debug_streamer(session_id)
|
|
|
|
| 628 |
and user_agent.llm_instance
|
| 629 |
):
|
| 630 |
llm_info = user_agent.get_llm_info()
|
| 631 |
+
session_debug = get_debug_streamer(session_id)
|
| 632 |
+
session_debug.debug(
|
| 633 |
+
f"Using session agent with LLM: {llm_info.get('provider', 'unknown')}/{llm_info.get('model_name', 'unknown')}"
|
| 634 |
)
|
| 635 |
else:
|
| 636 |
+
session_debug = get_debug_streamer(session_id)
|
| 637 |
+
session_debug.warning("Session agent has no LLM instance!")
|
| 638 |
|
| 639 |
# Use session-specific debug streamer
|
| 640 |
session_debug = get_debug_streamer(session_id)
|
agent_ng/langchain_memory.py
CHANGED
|
@@ -324,9 +324,6 @@ class LangChainConversationChain:
|
|
| 324 |
if not system_in_history:
|
| 325 |
# Store system message in memory only once
|
| 326 |
self.memory_manager.add_message(conversation_id, system_message)
|
| 327 |
-
print("🔍 DEBUG: Added system message to memory (first time)")
|
| 328 |
-
else:
|
| 329 |
-
print("🔍 DEBUG: System message already in memory, skipping storage")
|
| 330 |
|
| 331 |
# Add conversation history (excluding system messages to avoid duplication)
|
| 332 |
non_system_history = [msg for msg in chat_history if not isinstance(msg, SystemMessage)]
|
|
@@ -377,37 +374,25 @@ class LangChainConversationChain:
|
|
| 377 |
if tool_key in duplicate_counts:
|
| 378 |
# Increment count for duplicate
|
| 379 |
duplicate_counts[tool_key] += 1
|
| 380 |
-
print(f"🔍 DEBUG: Found duplicate tool call {tool_name} (total count: {duplicate_counts[tool_key]})")
|
| 381 |
else:
|
| 382 |
# First occurrence - add to unique list and initialize count
|
| 383 |
unique_tool_calls.append(tool_call)
|
| 384 |
duplicate_counts[tool_key] = 1
|
| 385 |
-
print(f"🔍 DEBUG: Added unique tool call {tool_name}")
|
| 386 |
|
| 387 |
return unique_tool_calls, duplicate_counts
|
| 388 |
|
| 389 |
def _track_token_usage(self, response, messages, conversation_id: str = "default"):
|
| 390 |
"""Track token usage for LLM response"""
|
| 391 |
try:
|
| 392 |
-
print(f"🔍 DEBUG: _track_token_usage called with response type: {type(response)}")
|
| 393 |
-
print(f"🔍 DEBUG: Has agent: {hasattr(self, 'agent')}")
|
| 394 |
-
if hasattr(self, 'agent'):
|
| 395 |
-
print(f"🔍 DEBUG: Agent is not None: {self.agent is not None}")
|
| 396 |
-
if self.agent:
|
| 397 |
-
print(f"🔍 DEBUG: Agent has token_tracker: {hasattr(self.agent, 'token_tracker')}")
|
| 398 |
-
|
| 399 |
# Get token tracker from the agent
|
| 400 |
if hasattr(self, 'agent') and self.agent and hasattr(self.agent, 'token_tracker'):
|
| 401 |
-
print("🔍 DEBUG: Using agent's token tracker")
|
| 402 |
self.agent.token_tracker.track_llm_response(response, messages)
|
| 403 |
else:
|
| 404 |
-
print("🔍 DEBUG: Creating new token tracker")
|
| 405 |
# Create a simple token tracker if none exists
|
| 406 |
from .token_counter import get_token_tracker
|
| 407 |
token_tracker = get_token_tracker(conversation_id)
|
| 408 |
token_tracker.track_llm_response(response, messages)
|
| 409 |
except Exception as e:
|
| 410 |
-
print(f"🔍 DEBUG: Token tracking error: {e}")
|
| 411 |
# Silently fail - token counting is not critical
|
| 412 |
pass
|
| 413 |
|
|
|
|
| 324 |
if not system_in_history:
|
| 325 |
# Store system message in memory only once
|
| 326 |
self.memory_manager.add_message(conversation_id, system_message)
|
|
|
|
|
|
|
|
|
|
| 327 |
|
| 328 |
# Add conversation history (excluding system messages to avoid duplication)
|
| 329 |
non_system_history = [msg for msg in chat_history if not isinstance(msg, SystemMessage)]
|
|
|
|
| 374 |
if tool_key in duplicate_counts:
|
| 375 |
# Increment count for duplicate
|
| 376 |
duplicate_counts[tool_key] += 1
|
|
|
|
| 377 |
else:
|
| 378 |
# First occurrence - add to unique list and initialize count
|
| 379 |
unique_tool_calls.append(tool_call)
|
| 380 |
duplicate_counts[tool_key] = 1
|
|
|
|
| 381 |
|
| 382 |
return unique_tool_calls, duplicate_counts
|
| 383 |
|
| 384 |
def _track_token_usage(self, response, messages, conversation_id: str = "default"):
|
| 385 |
"""Track token usage for LLM response"""
|
| 386 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 387 |
# Get token tracker from the agent
|
| 388 |
if hasattr(self, 'agent') and self.agent and hasattr(self.agent, 'token_tracker'):
|
|
|
|
| 389 |
self.agent.token_tracker.track_llm_response(response, messages)
|
| 390 |
else:
|
|
|
|
| 391 |
# Create a simple token tracker if none exists
|
| 392 |
from .token_counter import get_token_tracker
|
| 393 |
token_tracker = get_token_tracker(conversation_id)
|
| 394 |
token_tracker.track_llm_response(response, messages)
|
| 395 |
except Exception as e:
|
|
|
|
| 396 |
# Silently fail - token counting is not critical
|
| 397 |
pass
|
| 398 |
|
tools/file_utils.py
CHANGED
|
@@ -48,6 +48,7 @@ class ToolResponse(BaseModel):
|
|
| 48 |
result: Optional[str] = Field(None, description="Tool result content")
|
| 49 |
error: Optional[str] = Field(None, description="Error message if tool failed")
|
| 50 |
file_info: Optional[FileInfo] = Field(None, description="File information if applicable")
|
|
|
|
| 51 |
|
| 52 |
class FileUtils:
|
| 53 |
"""Utility class for common file operations."""
|
|
@@ -167,7 +168,7 @@ class FileUtils:
|
|
| 167 |
|
| 168 |
@staticmethod
|
| 169 |
def create_tool_response(tool_name: str, result: str = None, error: str = None,
|
| 170 |
-
file_info: FileInfo = None) -> str:
|
| 171 |
"""Create standardized tool response JSON with Pydantic validation."""
|
| 172 |
# Sanitize file_info to remove full paths
|
| 173 |
if file_info:
|
|
@@ -187,7 +188,8 @@ class FileUtils:
|
|
| 187 |
tool_name=tool_name,
|
| 188 |
result=result, # Full result, no truncation
|
| 189 |
error=error,
|
| 190 |
-
file_info=sanitized_file_info
|
|
|
|
| 191 |
)
|
| 192 |
|
| 193 |
return response.model_dump_json(indent=2)
|
|
@@ -583,4 +585,253 @@ class FileUtils:
|
|
| 583 |
@staticmethod
|
| 584 |
def is_pdf_file(file_path: str) -> bool:
|
| 585 |
"""Check if file is likely a PDF file based on extension."""
|
| 586 |
-
return Path(file_path).suffix.lower() == '.pdf'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
result: Optional[str] = Field(None, description="Tool result content")
|
| 49 |
error: Optional[str] = Field(None, description="Error message if tool failed")
|
| 50 |
file_info: Optional[FileInfo] = Field(None, description="File information if applicable")
|
| 51 |
+
extra: Optional[Dict[str, Any]] = Field(None, description="Optional structured payload for tool-specific data")
|
| 52 |
|
| 53 |
class FileUtils:
|
| 54 |
"""Utility class for common file operations."""
|
|
|
|
| 168 |
|
| 169 |
@staticmethod
|
| 170 |
def create_tool_response(tool_name: str, result: str = None, error: str = None,
|
| 171 |
+
file_info: FileInfo = None, extra: Dict[str, Any] = None) -> str:
|
| 172 |
"""Create standardized tool response JSON with Pydantic validation."""
|
| 173 |
# Sanitize file_info to remove full paths
|
| 174 |
if file_info:
|
|
|
|
| 188 |
tool_name=tool_name,
|
| 189 |
result=result, # Full result, no truncation
|
| 190 |
error=error,
|
| 191 |
+
file_info=sanitized_file_info,
|
| 192 |
+
extra=extra
|
| 193 |
)
|
| 194 |
|
| 195 |
return response.model_dump_json(indent=2)
|
|
|
|
| 585 |
@staticmethod
|
| 586 |
def is_pdf_file(file_path: str) -> bool:
|
| 587 |
"""Check if file is likely a PDF file based on extension."""
|
| 588 |
+
return Path(file_path).suffix.lower() == '.pdf'
|
| 589 |
+
|
| 590 |
+
@staticmethod
|
| 591 |
+
def get_mime_type(file_path: str) -> str:
|
| 592 |
+
"""Get MIME type for a file based on extension and content."""
|
| 593 |
+
import mimetypes
|
| 594 |
+
|
| 595 |
+
mime_type, _ = mimetypes.guess_type(file_path)
|
| 596 |
+
if mime_type:
|
| 597 |
+
return mime_type
|
| 598 |
+
|
| 599 |
+
ext = Path(file_path).suffix.lower()
|
| 600 |
+
mime_map = {
|
| 601 |
+
'.png': 'image/png',
|
| 602 |
+
'.jpg': 'image/jpeg',
|
| 603 |
+
'.jpeg': 'image/jpeg',
|
| 604 |
+
'.gif': 'image/gif',
|
| 605 |
+
'.webp': 'image/webp',
|
| 606 |
+
'.svg': 'image/svg+xml',
|
| 607 |
+
'.tiff': 'image/tiff',
|
| 608 |
+
'.bmp': 'image/bmp',
|
| 609 |
+
'.mp4': 'video/mp4',
|
| 610 |
+
'.webm': 'video/webm',
|
| 611 |
+
'.avi': 'video/x-msvideo',
|
| 612 |
+
'.mov': 'video/quicktime',
|
| 613 |
+
'.wav': 'audio/wav',
|
| 614 |
+
'.mp3': 'audio/mpeg',
|
| 615 |
+
'.ogg': 'audio/ogg',
|
| 616 |
+
'.flac': 'audio/flac',
|
| 617 |
+
'.aac': 'audio/aac',
|
| 618 |
+
'.m4a': 'audio/mp4',
|
| 619 |
+
'.html': 'text/html',
|
| 620 |
+
'.htm': 'text/html',
|
| 621 |
+
'.json': 'application/json',
|
| 622 |
+
'.xml': 'application/xml',
|
| 623 |
+
'.pdf': 'application/pdf'
|
| 624 |
+
}
|
| 625 |
+
|
| 626 |
+
return mime_map.get(ext, 'application/octet-stream')
|
| 627 |
+
|
| 628 |
+
@staticmethod
|
| 629 |
+
def detect_media_type(file_path: str) -> str:
|
| 630 |
+
"""Detect media type category for a file."""
|
| 631 |
+
if FileUtils.is_image_file(file_path):
|
| 632 |
+
return 'image'
|
| 633 |
+
elif FileUtils.is_video_file(file_path):
|
| 634 |
+
return 'video'
|
| 635 |
+
elif FileUtils.is_audio_file(file_path):
|
| 636 |
+
return 'audio'
|
| 637 |
+
elif Path(file_path).suffix.lower() == '.html':
|
| 638 |
+
return 'html'
|
| 639 |
+
elif Path(file_path).suffix.lower() in ['.png', '.svg'] and 'plot' in file_path.lower():
|
| 640 |
+
return 'plot'
|
| 641 |
+
else:
|
| 642 |
+
return 'unknown'
|
| 643 |
+
|
| 644 |
+
@staticmethod
|
| 645 |
+
def create_media_attachment(file_path: str, caption: str = None, metadata: Dict[str, Any] = None) -> Dict[str, Any]:
|
| 646 |
+
"""
|
| 647 |
+
Create a media attachment dictionary for rich content.
|
| 648 |
+
|
| 649 |
+
Args:
|
| 650 |
+
file_path: Path to the media file
|
| 651 |
+
caption: Optional caption for the media
|
| 652 |
+
metadata: Optional metadata dictionary
|
| 653 |
+
|
| 654 |
+
Returns:
|
| 655 |
+
Dict with media attachment information
|
| 656 |
+
"""
|
| 657 |
+
if not FileUtils.file_exists(file_path):
|
| 658 |
+
return {
|
| 659 |
+
"type": "error",
|
| 660 |
+
"error": f"File not found: {file_path}"
|
| 661 |
+
}
|
| 662 |
+
|
| 663 |
+
file_info = FileUtils.get_file_info(file_path)
|
| 664 |
+
media_type = FileUtils.detect_media_type(file_path)
|
| 665 |
+
mime_type = FileUtils.get_mime_type(file_path)
|
| 666 |
+
|
| 667 |
+
attachment = {
|
| 668 |
+
"type": "media_attachment",
|
| 669 |
+
"media_type": media_type,
|
| 670 |
+
"file_path": file_path,
|
| 671 |
+
"mime_type": mime_type,
|
| 672 |
+
"file_info": file_info.dict() if file_info else None
|
| 673 |
+
}
|
| 674 |
+
|
| 675 |
+
if caption:
|
| 676 |
+
attachment["caption"] = caption
|
| 677 |
+
|
| 678 |
+
if metadata:
|
| 679 |
+
attachment["metadata"] = metadata
|
| 680 |
+
|
| 681 |
+
return attachment
|
| 682 |
+
|
| 683 |
+
@staticmethod
|
| 684 |
+
def add_media_to_response(tool_response: Dict[str, Any], file_path: str,
|
| 685 |
+
caption: str = None, metadata: Dict[str, Any] = None) -> Dict[str, Any]:
|
| 686 |
+
"""
|
| 687 |
+
Add media attachment to an existing tool response.
|
| 688 |
+
|
| 689 |
+
Args:
|
| 690 |
+
tool_response: Existing tool response dictionary
|
| 691 |
+
file_path: Path to the media file
|
| 692 |
+
caption: Optional caption for the media
|
| 693 |
+
metadata: Optional metadata dictionary
|
| 694 |
+
|
| 695 |
+
Returns:
|
| 696 |
+
Updated tool response with media attachment
|
| 697 |
+
"""
|
| 698 |
+
if "media_attachments" not in tool_response:
|
| 699 |
+
tool_response["media_attachments"] = []
|
| 700 |
+
|
| 701 |
+
media_attachment = FileUtils.create_media_attachment(file_path, caption, metadata)
|
| 702 |
+
tool_response["media_attachments"].append(media_attachment)
|
| 703 |
+
|
| 704 |
+
return tool_response
|
| 705 |
+
|
| 706 |
+
@staticmethod
|
| 707 |
+
def extract_media_from_response(tool_response: Dict[str, Any]) -> List[Dict[str, Any]]:
|
| 708 |
+
"""
|
| 709 |
+
Extract media attachments from a tool response.
|
| 710 |
+
|
| 711 |
+
Args:
|
| 712 |
+
tool_response: Tool response dictionary
|
| 713 |
+
|
| 714 |
+
Returns:
|
| 715 |
+
List of media attachment dictionaries
|
| 716 |
+
"""
|
| 717 |
+
media_attachments = []
|
| 718 |
+
|
| 719 |
+
if "media_attachments" in tool_response:
|
| 720 |
+
media_attachments.extend(tool_response["media_attachments"])
|
| 721 |
+
|
| 722 |
+
if "result" in tool_response and isinstance(tool_response["result"], dict):
|
| 723 |
+
result = tool_response["result"]
|
| 724 |
+
for key, value in result.items():
|
| 725 |
+
if isinstance(value, str) and FileUtils.file_exists(value):
|
| 726 |
+
media_attachment = FileUtils.create_media_attachment(value, f"File: {key}")
|
| 727 |
+
media_attachments.append(media_attachment)
|
| 728 |
+
|
| 729 |
+
return media_attachments
|
| 730 |
+
|
| 731 |
+
@staticmethod
|
| 732 |
+
def is_base64_image(data: str) -> bool:
|
| 733 |
+
"""Check if string contains base64 image data."""
|
| 734 |
+
import base64
|
| 735 |
+
|
| 736 |
+
if data.startswith('data:image/'):
|
| 737 |
+
return True
|
| 738 |
+
|
| 739 |
+
if len(data) > 100:
|
| 740 |
+
try:
|
| 741 |
+
clean_data = ''.join(data.split())
|
| 742 |
+
decoded = base64.b64decode(clean_data)
|
| 743 |
+
image_magic = [
|
| 744 |
+
b'\x89PNG\r\n\x1a\n',
|
| 745 |
+
b'\xff\xd8\xff',
|
| 746 |
+
b'GIF87a',
|
| 747 |
+
b'GIF89a',
|
| 748 |
+
b'RIFF',
|
| 749 |
+
b'BM'
|
| 750 |
+
]
|
| 751 |
+
return any(decoded.startswith(magic) for magic in image_magic)
|
| 752 |
+
except:
|
| 753 |
+
return False
|
| 754 |
+
|
| 755 |
+
return False
|
| 756 |
+
|
| 757 |
+
@staticmethod
|
| 758 |
+
def save_base64_to_file(base64_data: str, output_path: str = None,
|
| 759 |
+
file_extension: str = None, session_id: str = None) -> str:
|
| 760 |
+
"""
|
| 761 |
+
Save base64 data to a file.
|
| 762 |
+
|
| 763 |
+
Args:
|
| 764 |
+
base64_data: Base64 encoded data (with or without data URI prefix)
|
| 765 |
+
output_path: Optional output file path
|
| 766 |
+
file_extension: Optional file extension for temp file
|
| 767 |
+
session_id: Optional session ID to save in session-isolated directory
|
| 768 |
+
|
| 769 |
+
Returns:
|
| 770 |
+
Path to the saved file
|
| 771 |
+
"""
|
| 772 |
+
import base64
|
| 773 |
+
import tempfile
|
| 774 |
+
import uuid
|
| 775 |
+
import mimetypes
|
| 776 |
+
from datetime import datetime
|
| 777 |
+
|
| 778 |
+
if base64_data.startswith('data:'):
|
| 779 |
+
header, data = base64_data.split(',', 1)
|
| 780 |
+
mime_type = header.split(':')[1].split(';')[0]
|
| 781 |
+
if not file_extension:
|
| 782 |
+
file_extension = mimetypes.guess_extension(mime_type) or '.bin'
|
| 783 |
+
else:
|
| 784 |
+
data = base64_data
|
| 785 |
+
if not file_extension:
|
| 786 |
+
file_extension = '.bin'
|
| 787 |
+
|
| 788 |
+
if not output_path:
|
| 789 |
+
if session_id:
|
| 790 |
+
session_dir = Path(f".gradio/sessions/{session_id}")
|
| 791 |
+
session_dir.mkdir(parents=True, exist_ok=True)
|
| 792 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 793 |
+
unique_id = str(uuid.uuid4())[:8]
|
| 794 |
+
filename = f"llm_image_{timestamp}_{unique_id}{file_extension}"
|
| 795 |
+
output_path = str(session_dir / filename)
|
| 796 |
+
else:
|
| 797 |
+
temp_fd, output_path = tempfile.mkstemp(suffix=file_extension)
|
| 798 |
+
os.close(temp_fd)
|
| 799 |
+
|
| 800 |
+
decoded_data = base64.b64decode(data)
|
| 801 |
+
with open(output_path, 'wb') as f:
|
| 802 |
+
f.write(decoded_data)
|
| 803 |
+
return output_path
|
| 804 |
+
|
| 805 |
+
@staticmethod
|
| 806 |
+
def create_gallery_attachment(image_paths: List[str], captions: List[str] = None) -> Dict[str, Any]:
|
| 807 |
+
"""
|
| 808 |
+
Create a gallery attachment for multiple images.
|
| 809 |
+
|
| 810 |
+
Args:
|
| 811 |
+
image_paths: List of image file paths
|
| 812 |
+
captions: Optional list of captions for each image
|
| 813 |
+
|
| 814 |
+
Returns:
|
| 815 |
+
Gallery attachment dictionary
|
| 816 |
+
"""
|
| 817 |
+
if not image_paths:
|
| 818 |
+
return {"type": "error", "error": "No image paths provided"}
|
| 819 |
+
|
| 820 |
+
valid_images = []
|
| 821 |
+
for i, path in enumerate(image_paths):
|
| 822 |
+
if FileUtils.file_exists(path) and FileUtils.is_image_file(path):
|
| 823 |
+
image_info = {
|
| 824 |
+
"path": path,
|
| 825 |
+
"caption": captions[i] if captions and i < len(captions) else None
|
| 826 |
+
}
|
| 827 |
+
valid_images.append(image_info)
|
| 828 |
+
|
| 829 |
+
if not valid_images:
|
| 830 |
+
return {"type": "error", "error": "No valid image files found"}
|
| 831 |
+
|
| 832 |
+
return {
|
| 833 |
+
"type": "gallery_attachment",
|
| 834 |
+
"media_type": "gallery",
|
| 835 |
+
"images": valid_images,
|
| 836 |
+
"count": len(valid_images)
|
| 837 |
+
}
|
tools/tools.py
CHANGED
|
@@ -898,6 +898,182 @@ def extract_text_from_image(file_reference: str, agent=None) -> str:
|
|
| 898 |
except Exception as e:
|
| 899 |
return FileUtils.create_tool_response("extract_text_from_image", error=f"Error extracting text from image: {str(e)}")
|
| 900 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 901 |
@tool
|
| 902 |
def analyze_csv_file(file_reference: str, query: str, agent=None) -> str:
|
| 903 |
"""
|
|
@@ -930,12 +1106,28 @@ def analyze_csv_file(file_reference: str, query: str, agent=None) -> str:
|
|
| 930 |
return FileUtils.create_tool_response("analyze_csv_file", error=file_info.error)
|
| 931 |
try:
|
| 932 |
df = pd.read_csv(file_path)
|
| 933 |
-
|
| 934 |
-
|
| 935 |
-
|
| 936 |
-
|
| 937 |
-
|
| 938 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 939 |
except Exception as e:
|
| 940 |
return FileUtils.create_tool_response("analyze_csv_file", error=f"Error analyzing CSV file: {str(e)}")
|
| 941 |
|
|
@@ -971,12 +1163,28 @@ def analyze_excel_file(file_reference: str, query: str, agent=None) -> str:
|
|
| 971 |
return FileUtils.create_tool_response("analyze_excel_file", error=file_info.error)
|
| 972 |
try:
|
| 973 |
df = pd.read_excel(file_path)
|
| 974 |
-
|
| 975 |
-
|
| 976 |
-
|
| 977 |
-
|
| 978 |
-
|
| 979 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 980 |
except Exception as e:
|
| 981 |
# Enhanced error reporting: print columns and head if possible
|
| 982 |
try:
|
|
@@ -1038,7 +1246,7 @@ def analyze_image(file_reference: str, agent=None) -> str:
|
|
| 1038 |
"color_analysis": color_analysis,
|
| 1039 |
"thumbnail": thumbnail_base64,
|
| 1040 |
}
|
| 1041 |
-
return FileUtils.create_tool_response("analyze_image", result=result)
|
| 1042 |
except Exception as e:
|
| 1043 |
return FileUtils.create_tool_response("analyze_image", error=str(e))
|
| 1044 |
|
|
@@ -1205,17 +1413,23 @@ def draw_on_image(image_base64: str, drawing_type: str, params: DrawOnImageParam
|
|
| 1205 |
}, indent=2)
|
| 1206 |
|
| 1207 |
class GenerateSimpleImageParams(BaseModel):
|
|
|
|
|
|
|
|
|
|
| 1208 |
color: Optional[str] = Field(None, description="Solid color for 'solid' type (e.g., 'red', 'blue') or RGB string (e.g., '255,0,0')")
|
| 1209 |
start_color: Optional[List[int]] = Field(None, description="Gradient start color [r, g, b]")
|
| 1210 |
end_color: Optional[List[int]] = Field(None, description="Gradient end color [r, g, b]")
|
| 1211 |
-
direction: Optional[Literal["horizontal", "vertical"]] = Field(None, description="Gradient direction")
|
| 1212 |
square_size: Optional[int] = Field(None, description="Square size for checkerboard")
|
| 1213 |
color1: Optional[str] = Field(None, description="First color for checkerboard")
|
| 1214 |
color2: Optional[str] = Field(None, description="Second color for checkerboard")
|
| 1215 |
|
| 1216 |
@tool(args_schema=GenerateSimpleImageParams)
|
| 1217 |
def generate_simple_image(image_type: str, width: int = 500, height: int = 500,
|
| 1218 |
-
|
|
|
|
|
|
|
|
|
|
| 1219 |
"""
|
| 1220 |
Generate simple images like gradients, solid colors, checkerboard, or noise patterns.
|
| 1221 |
|
|
@@ -1229,9 +1443,8 @@ def generate_simple_image(image_type: str, width: int = 500, height: int = 500,
|
|
| 1229 |
str: JSON string with the generated image as base64 or error message.
|
| 1230 |
"""
|
| 1231 |
try:
|
| 1232 |
-
params = params or {}
|
| 1233 |
if image_type == "solid":
|
| 1234 |
-
color_str =
|
| 1235 |
# Parse color string to RGB tuple
|
| 1236 |
if "," in color_str and color_str.replace(",", "").replace(" ", "").isdigit():
|
| 1237 |
try:
|
|
@@ -1250,9 +1463,9 @@ def generate_simple_image(image_type: str, width: int = 500, height: int = 500,
|
|
| 1250 |
color = (255, 255, 255)
|
| 1251 |
img = Image.new("RGB", (width, height), color)
|
| 1252 |
elif image_type == "gradient":
|
| 1253 |
-
start_color =
|
| 1254 |
-
end_color =
|
| 1255 |
-
direction =
|
| 1256 |
img = Image.new("RGB", (width, height))
|
| 1257 |
draw = ImageDraw.Draw(img)
|
| 1258 |
if direction == "horizontal":
|
|
@@ -1271,9 +1484,9 @@ def generate_simple_image(image_type: str, width: int = 500, height: int = 500,
|
|
| 1271 |
noise_array = np.random.randint(0, 256, (height, width, 3), dtype=np.uint8)
|
| 1272 |
img = Image.fromarray(noise_array, "RGB")
|
| 1273 |
elif image_type == "checkerboard":
|
| 1274 |
-
square_size =
|
| 1275 |
-
color1 =
|
| 1276 |
-
color2 =
|
| 1277 |
img = Image.new("RGB", (width, height))
|
| 1278 |
for y in range(0, height, square_size):
|
| 1279 |
for x in range(0, width, square_size):
|
|
|
|
| 898 |
except Exception as e:
|
| 899 |
return FileUtils.create_tool_response("extract_text_from_image", error=f"Error extracting text from image: {str(e)}")
|
| 900 |
|
| 901 |
+
# ========== PANDAS QUERY/PIPELINE HELPERS ==========
|
| 902 |
+
def _safe_to_markdown(df: pd.DataFrame, max_rows: int = 10, max_cols: int = 20) -> str:
|
| 903 |
+
preview_df = df.head(max_rows)
|
| 904 |
+
if max_cols is not None:
|
| 905 |
+
preview_df = preview_df.iloc[:, :max_cols]
|
| 906 |
+
try:
|
| 907 |
+
return preview_df.to_markdown(index=False)
|
| 908 |
+
except Exception:
|
| 909 |
+
return preview_df.to_string(index=False)
|
| 910 |
+
|
| 911 |
+
|
| 912 |
+
def _dataframe_schema(df: pd.DataFrame) -> Dict[str, str]:
|
| 913 |
+
return {str(col): str(dtype) for col, dtype in df.dtypes.items()}
|
| 914 |
+
|
| 915 |
+
|
| 916 |
+
def _truncate_records(df: pd.DataFrame, max_rows: int = 100, max_cols: int = 50, max_cell_chars: int = 500) -> List[Dict[str, Any]]:
|
| 917 |
+
limited = df.head(max_rows)
|
| 918 |
+
if max_cols is not None:
|
| 919 |
+
limited = limited.iloc[:, :max_cols]
|
| 920 |
+
def _truncate_val(v: Any) -> Any:
|
| 921 |
+
try:
|
| 922 |
+
s = str(v)
|
| 923 |
+
except Exception:
|
| 924 |
+
return v
|
| 925 |
+
if len(s) > max_cell_chars:
|
| 926 |
+
return s[: max_cell_chars - 1] + "…"
|
| 927 |
+
return v
|
| 928 |
+
return [{k: _truncate_val(v) for k, v in row.items()} for row in limited.to_dict(orient="records")]
|
| 929 |
+
|
| 930 |
+
|
| 931 |
+
_ALLOWED_OPS: Dict[str, Literal["df_method", "special"]] = {
|
| 932 |
+
"query": "df_method",
|
| 933 |
+
"assign": "df_method",
|
| 934 |
+
"rename": "df_method",
|
| 935 |
+
"drop": "df_method",
|
| 936 |
+
"dropna": "df_method",
|
| 937 |
+
"fillna": "df_method",
|
| 938 |
+
"astype": "df_method",
|
| 939 |
+
"sort_values": "df_method",
|
| 940 |
+
"head": "df_method",
|
| 941 |
+
"tail": "df_method",
|
| 942 |
+
"sample": "df_method",
|
| 943 |
+
"value_counts": "df_method",
|
| 944 |
+
"nlargest": "df_method",
|
| 945 |
+
"nsmallest": "df_method",
|
| 946 |
+
"reset_index": "df_method",
|
| 947 |
+
"set_index": "df_method",
|
| 948 |
+
"pivot_table": "df_method",
|
| 949 |
+
"melt": "df_method",
|
| 950 |
+
"stack": "df_method",
|
| 951 |
+
"unstack": "df_method",
|
| 952 |
+
"groupby": "special",
|
| 953 |
+
}
|
| 954 |
+
|
| 955 |
+
|
| 956 |
+
def _coerce_tabular(obj: Any, step_name: str) -> pd.DataFrame:
|
| 957 |
+
if isinstance(obj, pd.DataFrame):
|
| 958 |
+
return obj
|
| 959 |
+
if isinstance(obj, pd.Series):
|
| 960 |
+
return obj.to_frame(name=step_name or "value").reset_index()
|
| 961 |
+
return pd.DataFrame(obj)
|
| 962 |
+
|
| 963 |
+
|
| 964 |
+
def _dispatch_pipeline(df: pd.DataFrame, steps: List[Dict[str, Any]]) -> pd.DataFrame:
|
| 965 |
+
current = df
|
| 966 |
+
for i, step in enumerate(steps):
|
| 967 |
+
if not isinstance(step, dict):
|
| 968 |
+
raise ValueError(f"Pipeline step {i} must be an object")
|
| 969 |
+
op = step.get("op")
|
| 970 |
+
if not isinstance(op, str) or op.startswith("__"):
|
| 971 |
+
raise ValueError(f"Invalid op at step {i}")
|
| 972 |
+
kind = _ALLOWED_OPS.get(op)
|
| 973 |
+
if kind is None:
|
| 974 |
+
raise ValueError(f"Op '{op}' not allowed")
|
| 975 |
+
if kind == "df_method":
|
| 976 |
+
method = getattr(current, op, None)
|
| 977 |
+
if method is None or not callable(method):
|
| 978 |
+
raise ValueError(f"Method '{op}' not available on DataFrame")
|
| 979 |
+
kwargs = {k: v for k, v in step.items() if k != "op"}
|
| 980 |
+
result = method(**kwargs) if kwargs else method()
|
| 981 |
+
current = _coerce_tabular(result, op)
|
| 982 |
+
else:
|
| 983 |
+
if op == "groupby":
|
| 984 |
+
by = step.get("by")
|
| 985 |
+
gb = current.groupby(by=by, dropna=False, observed=False)
|
| 986 |
+
if "agg" in step:
|
| 987 |
+
result = gb.agg(step.get("agg"))
|
| 988 |
+
current = _coerce_tabular(result, op)
|
| 989 |
+
elif step.get("size") is True:
|
| 990 |
+
current = gb.size().reset_index(name="size")
|
| 991 |
+
else:
|
| 992 |
+
raise ValueError("groupby requires 'agg' or size=true")
|
| 993 |
+
else:
|
| 994 |
+
raise ValueError(f"Unsupported special op: {op}")
|
| 995 |
+
return current
|
| 996 |
+
|
| 997 |
+
|
| 998 |
+
def _apply_pandas_query(
|
| 999 |
+
df: pd.DataFrame,
|
| 1000 |
+
query: Optional[str],
|
| 1001 |
+
preview_opts: Optional[Dict[str, Any]] = None,
|
| 1002 |
+
plot_opts: Optional[Dict[str, Any]] = None,
|
| 1003 |
+
) -> Tuple[pd.DataFrame, Dict[str, Any]]:
|
| 1004 |
+
preview = preview_opts or {"rows": 10, "cols": 20, "include_schema": True}
|
| 1005 |
+
plots: List[str] = []
|
| 1006 |
+
original_shape = tuple(df.shape)
|
| 1007 |
+
|
| 1008 |
+
transformed = df
|
| 1009 |
+
if query and isinstance(query, str) and query.strip():
|
| 1010 |
+
q = query.strip()
|
| 1011 |
+
try:
|
| 1012 |
+
if q.startswith("{") and q.endswith("}"):
|
| 1013 |
+
cfg = json.loads(q)
|
| 1014 |
+
if isinstance(cfg.get("pipeline"), list):
|
| 1015 |
+
transformed = _dispatch_pipeline(df, cfg["pipeline"]) # type: ignore[arg-type]
|
| 1016 |
+
elif isinstance(cfg.get("expr"), str):
|
| 1017 |
+
transformed = df.query(cfg["expr"]) # type: ignore[arg-type]
|
| 1018 |
+
plot_opts = cfg.get("plot") or plot_opts
|
| 1019 |
+
preview = cfg.get("preview") or preview
|
| 1020 |
+
elif q.startswith("[") and q.endswith("]"):
|
| 1021 |
+
steps = json.loads(q)
|
| 1022 |
+
transformed = _dispatch_pipeline(df, steps)
|
| 1023 |
+
elif q.lower().startswith("expr:"):
|
| 1024 |
+
expr = q.split(":", 1)[1].strip()
|
| 1025 |
+
transformed = df.query(expr)
|
| 1026 |
+
else:
|
| 1027 |
+
transformed = df.query(q)
|
| 1028 |
+
except Exception as e:
|
| 1029 |
+
raise ValueError(f"Failed to apply query: {e}")
|
| 1030 |
+
|
| 1031 |
+
if plot_opts and MATPLOTLIB_AVAILABLE and plt is not None:
|
| 1032 |
+
try:
|
| 1033 |
+
kind = plot_opts.get("kind", "bar")
|
| 1034 |
+
x = plot_opts.get("x")
|
| 1035 |
+
y = plot_opts.get("y")
|
| 1036 |
+
fig = plt.figure()
|
| 1037 |
+
ax = fig.gca()
|
| 1038 |
+
data = transformed
|
| 1039 |
+
if x is None and y is None and kind in ("bar", "barh"):
|
| 1040 |
+
non_numeric = [c for c in data.columns if not pd.api.types.is_numeric_dtype(data[c])]
|
| 1041 |
+
target_col = non_numeric[0] if non_numeric else data.columns[0]
|
| 1042 |
+
vc = data[target_col].value_counts().head(20)
|
| 1043 |
+
vc.plot(kind=kind, ax=ax)
|
| 1044 |
+
else:
|
| 1045 |
+
data.plot(kind=kind, x=x, y=y, ax=ax)
|
| 1046 |
+
plot_path = os.path.join(tempfile.gettempdir(), f"df_plot_{uuid.uuid4().hex}.png")
|
| 1047 |
+
fig.savefig(plot_path, bbox_inches="tight")
|
| 1048 |
+
plt.close(fig)
|
| 1049 |
+
plots.append(encode_image(plot_path))
|
| 1050 |
+
except Exception:
|
| 1051 |
+
pass
|
| 1052 |
+
|
| 1053 |
+
rows = int(preview.get("rows", 10))
|
| 1054 |
+
cols = int(preview.get("cols", 20))
|
| 1055 |
+
include_schema = bool(preview.get("include_schema", True))
|
| 1056 |
+
|
| 1057 |
+
table_markdown = _safe_to_markdown(transformed, rows, cols)
|
| 1058 |
+
table_records = _truncate_records(transformed, max_rows=min(rows, 1000), max_cols=min(cols, 100))
|
| 1059 |
+
payload: Dict[str, Any] = {
|
| 1060 |
+
"original_shape": original_shape,
|
| 1061 |
+
"shape": tuple(transformed.shape),
|
| 1062 |
+
"table_markdown": table_markdown,
|
| 1063 |
+
"table_records": table_records,
|
| 1064 |
+
}
|
| 1065 |
+
if include_schema:
|
| 1066 |
+
payload["schema"] = _dataframe_schema(transformed)
|
| 1067 |
+
try:
|
| 1068 |
+
if transformed.shape[0] <= 5000 and transformed.shape[1] <= 50:
|
| 1069 |
+
payload["describe_summary"] = str(transformed.describe(include="all", datetime_is_numeric=True))
|
| 1070 |
+
except Exception:
|
| 1071 |
+
pass
|
| 1072 |
+
if plots:
|
| 1073 |
+
payload["plots"] = plots
|
| 1074 |
+
return transformed, payload
|
| 1075 |
+
|
| 1076 |
+
|
| 1077 |
@tool
|
| 1078 |
def analyze_csv_file(file_reference: str, query: str, agent=None) -> str:
|
| 1079 |
"""
|
|
|
|
| 1106 |
return FileUtils.create_tool_response("analyze_csv_file", error=file_info.error)
|
| 1107 |
try:
|
| 1108 |
df = pd.read_csv(file_path)
|
| 1109 |
+
_, payload = _apply_pandas_query(
|
| 1110 |
+
df,
|
| 1111 |
+
query=query if isinstance(query, str) and query.strip() else None,
|
| 1112 |
+
preview_opts=None,
|
| 1113 |
+
plot_opts=None,
|
| 1114 |
+
)
|
| 1115 |
+
header = (
|
| 1116 |
+
f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
|
| 1117 |
+
f"File: {file_info.name} ({FileUtils.format_file_size(file_info.size)})\n"
|
| 1118 |
+
)
|
| 1119 |
+
result_parts = [header]
|
| 1120 |
+
if payload.get("table_markdown"):
|
| 1121 |
+
result_parts.append("Preview:\n" + payload["table_markdown"])
|
| 1122 |
+
if payload.get("describe_summary"):
|
| 1123 |
+
result_parts.append("\n\nSummary statistics:\n" + str(payload["describe_summary"]))
|
| 1124 |
+
result_text = "\n".join(result_parts)
|
| 1125 |
+
return FileUtils.create_tool_response(
|
| 1126 |
+
"analyze_csv_file",
|
| 1127 |
+
result=result_text,
|
| 1128 |
+
file_info=file_info,
|
| 1129 |
+
extra=payload,
|
| 1130 |
+
)
|
| 1131 |
except Exception as e:
|
| 1132 |
return FileUtils.create_tool_response("analyze_csv_file", error=f"Error analyzing CSV file: {str(e)}")
|
| 1133 |
|
|
|
|
| 1163 |
return FileUtils.create_tool_response("analyze_excel_file", error=file_info.error)
|
| 1164 |
try:
|
| 1165 |
df = pd.read_excel(file_path)
|
| 1166 |
+
_, payload = _apply_pandas_query(
|
| 1167 |
+
df,
|
| 1168 |
+
query=query if isinstance(query, str) and query.strip() else None,
|
| 1169 |
+
preview_opts=None,
|
| 1170 |
+
plot_opts=None,
|
| 1171 |
+
)
|
| 1172 |
+
header = (
|
| 1173 |
+
f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
|
| 1174 |
+
f"File: {file_info.name} ({FileUtils.format_file_size(file_info.size)})\n"
|
| 1175 |
+
)
|
| 1176 |
+
result_parts = [header]
|
| 1177 |
+
if payload.get("table_markdown"):
|
| 1178 |
+
result_parts.append("Preview:\n" + payload["table_markdown"])
|
| 1179 |
+
if payload.get("describe_summary"):
|
| 1180 |
+
result_parts.append("\n\nSummary statistics:\n" + str(payload["describe_summary"]))
|
| 1181 |
+
result_text = "\n".join(result_parts)
|
| 1182 |
+
return FileUtils.create_tool_response(
|
| 1183 |
+
"analyze_excel_file",
|
| 1184 |
+
result=result_text,
|
| 1185 |
+
file_info=file_info,
|
| 1186 |
+
extra=payload,
|
| 1187 |
+
)
|
| 1188 |
except Exception as e:
|
| 1189 |
# Enhanced error reporting: print columns and head if possible
|
| 1190 |
try:
|
|
|
|
| 1246 |
"color_analysis": color_analysis,
|
| 1247 |
"thumbnail": thumbnail_base64,
|
| 1248 |
}
|
| 1249 |
+
return FileUtils.create_tool_response("analyze_image", result=json.dumps(result))
|
| 1250 |
except Exception as e:
|
| 1251 |
return FileUtils.create_tool_response("analyze_image", error=str(e))
|
| 1252 |
|
|
|
|
| 1413 |
}, indent=2)
|
| 1414 |
|
| 1415 |
class GenerateSimpleImageParams(BaseModel):
|
| 1416 |
+
image_type: str = Field(..., description="Type of image to generate: 'solid', 'gradient', 'checkerboard', 'noise'")
|
| 1417 |
+
width: int = Field(500, description="Width of the generated image")
|
| 1418 |
+
height: int = Field(500, description="Height of the generated image")
|
| 1419 |
color: Optional[str] = Field(None, description="Solid color for 'solid' type (e.g., 'red', 'blue') or RGB string (e.g., '255,0,0')")
|
| 1420 |
start_color: Optional[List[int]] = Field(None, description="Gradient start color [r, g, b]")
|
| 1421 |
end_color: Optional[List[int]] = Field(None, description="Gradient end color [r, g, b]")
|
| 1422 |
+
direction: Optional[Literal["horizontal", "vertical"]] = Field(None, description="Gradient direction ('horizontal' or 'vertical')")
|
| 1423 |
square_size: Optional[int] = Field(None, description="Square size for checkerboard")
|
| 1424 |
color1: Optional[str] = Field(None, description="First color for checkerboard")
|
| 1425 |
color2: Optional[str] = Field(None, description="Second color for checkerboard")
|
| 1426 |
|
| 1427 |
@tool(args_schema=GenerateSimpleImageParams)
|
| 1428 |
def generate_simple_image(image_type: str, width: int = 500, height: int = 500,
|
| 1429 |
+
color: Optional[str] = None, start_color: Optional[List[int]] = None,
|
| 1430 |
+
end_color: Optional[List[int]] = None, direction: Optional[str] = None,
|
| 1431 |
+
square_size: Optional[int] = None, color1: Optional[str] = None,
|
| 1432 |
+
color2: Optional[str] = None) -> str:
|
| 1433 |
"""
|
| 1434 |
Generate simple images like gradients, solid colors, checkerboard, or noise patterns.
|
| 1435 |
|
|
|
|
| 1443 |
str: JSON string with the generated image as base64 or error message.
|
| 1444 |
"""
|
| 1445 |
try:
|
|
|
|
| 1446 |
if image_type == "solid":
|
| 1447 |
+
color_str = color or "255,255,255"
|
| 1448 |
# Parse color string to RGB tuple
|
| 1449 |
if "," in color_str and color_str.replace(",", "").replace(" ", "").isdigit():
|
| 1450 |
try:
|
|
|
|
| 1463 |
color = (255, 255, 255)
|
| 1464 |
img = Image.new("RGB", (width, height), color)
|
| 1465 |
elif image_type == "gradient":
|
| 1466 |
+
start_color = start_color or [255, 0, 0]
|
| 1467 |
+
end_color = end_color or [0, 0, 255]
|
| 1468 |
+
direction = direction or "horizontal"
|
| 1469 |
img = Image.new("RGB", (width, height))
|
| 1470 |
draw = ImageDraw.Draw(img)
|
| 1471 |
if direction == "horizontal":
|
|
|
|
| 1484 |
noise_array = np.random.randint(0, 256, (height, width, 3), dtype=np.uint8)
|
| 1485 |
img = Image.fromarray(noise_array, "RGB")
|
| 1486 |
elif image_type == "checkerboard":
|
| 1487 |
+
square_size = square_size or 50
|
| 1488 |
+
color1 = color1 or "white"
|
| 1489 |
+
color2 = color2 or "black"
|
| 1490 |
img = Image.new("RGB", (width, height))
|
| 1491 |
for y in range(0, height, square_size):
|
| 1492 |
for x in range(0, width, square_size):
|