Spaces:

msintui
/

Intelligent_PID

Runtime error

App Files Files Community

msIntui commited on Feb 5, 2025

Commit

910e0d4

0 Parent(s):

feat: initial clean deployment

Browse files

Files changed (30) hide show

.gitignore +74 -0
README.md +494 -0
assets/AiAgent.png +0 -0
assets/intuigence.png +0 -0
assets/user.png +0 -0
base.py +138 -0
base_config.py +8 -0
chatbot_agent.py +194 -0
common.py +14 -0
config.py +97 -0
data_aggregation_ai.py +411 -0
detection_schema.py +481 -0
detection_utils.py +81 -0
detectors.py +733 -0
download_models.py +49 -0
gradioChatApp.py +806 -0
graph_construction.py +115 -0
graph_processor.py +118 -0
graph_visualization.py +180 -0
line_detection_ai.py +413 -0
line_detectors.py +109 -0
logger.py +30 -0
pdf_processor.py +144 -0
requirements.txt +54 -0
results/002_page_1_aggregated.json +0 -0
results/002_page_1_detected_symbols.json +0 -0
storage.py +208 -0
symbol_detection.py +454 -0
text_detection_combined.py +563 -0
utils.py +132 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,74 @@

+# Directories to ignore
+archive/
+debug/
+samples/
+chat/
+# Models - allow specific model files
+models/*
+!models/yolo/
+!models/deeplsd/
+!models/doctr/
+!models/*.pt
+!models/*.tar
+results/
+logs/
+DeepLSD/
+# Large files
+*.tar
+*.pt
+*.pth
+*.onnx
+*.weights
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual Environment
+.venv
+venv/
+ENV/
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+# Project specific
+!results/
+!results/*.json
+debug/
+*.log
+*.gz
+models/
+archive/
+weights/
+# Environment variables
+.env
+.env.*
+# Explicitly track assets
+!assets/
+!assets/*.png
+!assets/*.css

README.md ADDED Viewed

	@@ -0,0 +1,494 @@

+---
+title: Intelligent_PID
+emoji: 🔍
+colorFrom: red
+colorTo: red
+sdk: gradio
+sdk_version: 3.50.2
+app_file: gradioChatApp.py
+pinned: false
+---
+# P&ID Processing with AI-Powered Graph Construction
+## Overview
+This project processes P&ID (Piping and Instrumentation Diagram) images using multiple AI models for symbol detection, text recognition, and line detection. It constructs a graph representation of the diagram and provides an interactive interface for querying the diagram's contents.
+## Features
+- P&ID Document Processing
+- Symbol Detection
+- Text Recognition
+- Line Detection
+- Knowledge Graph Generation
+- Interactive Chat Interface
+## Usage
+1. Upload a P&ID document
+2. Click "Process Document"
+3. View results in different tabs
+4. Ask questions about the P&ID in the chat
+## Process Flow
+```mermaid
+graph TD
+    subgraph "Document Input"
+        A[Upload Document] --> B[Validate File]
+        B -->|PDF/Image| C[Document Processor]
+        B -->|Invalid| ERR[Error Message]
+        C -->|PDF| D1[Extract Pages]
+        C -->|Image| D2[Direct Process]
+    end
+    subgraph "Image Preprocessing"
+        D1 --> E[Optimize Image]
+        D2 --> E
+        E -->|CLAHE Enhancement| E1[Contrast Enhancement]
+        E1 -->|Denoising| E2[Clean Image]
+        E2 -->|Binarization| E3[Binary Image]
+        E3 -->|Resize| E4[Normalized Image]
+    end
+    subgraph "Line Detection Pipeline"
+        E4 --> L1[Load DeepLSD Model]
+        L1 --> L2[Scale Image 0.1x]
+        L2 --> L3[Grayscale Conversion]
+        L3 --> L4[Model Inference]
+        L4 --> L5[Scale Coordinates]
+        L5 --> L6[Draw Lines]
+    end
+    subgraph "Detection Pipeline"
+        E4 --> F[Symbol Detection]
+        E4 --> G[Text Detection]
+        F --> S1[Load YOLO Models]
+        G --> T1[Load OCR Models]
+        S1 --> S2[Detect Symbols]
+        T1 --> T2[Detect Text]
+        S2 --> S3[Process Symbols]
+        T2 --> T3[Process Text]
+        L6 --> L7[Process Lines]
+    end
+    subgraph "Data Integration"
+        S3 --> I[Data Aggregation]
+        T3 --> I
+        L7 --> I
+        I --> J[Create Edges]
+        J --> K[Build Graph Network]
+        K --> L[Generate Knowledge Graph]
+    end
+    subgraph "User Interface"
+        L --> M[Interactive Visualization]
+        M --> N[Chat Interface]
+        N --> O[Query Processing]
+        O --> P[Response Generation]
+        P --> N
+    end
+    style A fill:#f9f,stroke:#333,stroke-width:2px
+    style F fill:#fbb,stroke:#333,stroke-width:2px
+    style G fill:#bfb,stroke:#333,stroke-width:2px
+    style H fill:#bbf,stroke:#333,stroke-width:2px
+    style I fill:#fbf,stroke:#333,stroke-width:2px
+    style N fill:#bbf,stroke:#333,stroke-width:2px
+    %% Add style for model nodes
+    style SM1 fill:#ffe6e6,stroke:#333,stroke-width:2px
+    style SM2 fill:#ffe6e6,stroke:#333,stroke-width:2px
+    style LM1 fill:#e6e6ff,stroke:#333,stroke-width:2px
+    style DC1 fill:#e6ffe6,stroke:#333,stroke-width:2px
+    style DC2 fill:#e6ffe6,stroke:#333,stroke-width:2px
+```
+## Architecture
+![Project Architecture](./assets/P&ID_to_Graph.drawio.png)
+## Features
+- **Multi-modal AI Processing**:
+  - Combined OCR approach using Tesseract, EasyOCR, and DocTR
+  - Symbol detection with optimized thresholds
+  - Intelligent line and connection detection
+- **Document Processing**:
+  - Support for PDF, PNG, JPG, JPEG formats
+  - Automatic page extraction from PDFs
+  - Image optimization pipeline
+- **Text Detection Types**:
+  - Equipment Tags
+  - Line Numbers
+  - Instrument Tags
+  - Valve Numbers
+  - Pipe Sizes
+  - Flow Directions
+  - Service Descriptions
+  - Process Instruments
+  - Nozzles
+  - Pipe Connectors
+- **Data Integration**:
+  - Automatic edge detection
+  - Relationship mapping
+  - Confidence scoring
+  - Detailed detection statistics
+- **User Interface**:
+  - Interactive visualization tabs
+  - Real-time processing feedback
+  - AI-powered chat interface
+  - Knowledge graph exploration
+The entire process is visualized through an interactive Gradio-based UI, allowing users to upload a P&ID image, follow the detection steps, and view both the results and insights in real time.
+## Key Files
+- **gradioChatApp.py**: The main Gradio app script that handles the frontend and orchestrates the overall flow.
+- **symbol_detection.py**: Module for detecting symbols using YOLO models.
+- **text_detection_combined.py**: Unified module for text detection using multiple OCR engines (Tesseract, EasyOCR, DocTR).
+- **line_detection_ai.py**: Module for detecting lines and connections using AI.
+- **data_aggregation.py**: Aggregates detected elements into a structured format.
+- **graph_construction.py**: Constructs the graph network from aggregated data.
+- **graph_processor.py**: Handles graph visualization and processing.
+- **pdf_processor.py**: Handles PDF document processing and page extraction.
+## Setup and Installation
+1. Clone the repository:
+```bash
+git clone https://github.com/IntuigenceAI/intui-PnID-POC.git
+cd intui-PnID-POC
+```
+2. Install dependencies using uv:
+```bash
+# Install uv if you haven't already
+curl -LsSf https://astral.sh/uv/install.sh | sh
+# Create and activate virtual environment
+uv venv
+source .venv/bin/activate  # On Windows: .venv\Scripts\activate
+# Install dependencies
+uv pip install -r requirements.txt
+```
+3. Download required models:
+```bash
+python download_model.py  # Downloads DeepLSD model for line detection
+```
+4. Run the application:
+```bash
+python gradioChatApp.py
+```
+## Models
+### Line Detection Model
+- **DeepLSD Model**:
+  - File: deeplsd_md.tar
+  - Purpose: Line segment detection in P&ID diagrams
+  - Input Resolution: Variable (scaled to 0.1x for performance)
+  - Processing: Grayscale conversion and binary thresholding
+### Text Detection Models
+- **Combined OCR Approach**:
+  - Tesseract OCR
+  - EasyOCR
+  - DocTR
+  - Purpose: Text recognition and classification
+### Graph Processing
+- **NetworkX-based**:
+  - Purpose: Graph construction and analysis
+  - Features: Node linking, edge creation, path analysis
+## Updating the Environment
+To update the environment, use the following:
+```bash
+conda env update --file environment.yml --prune
+```
+This command will update the environment according to changes made in the `environment.yml`.
+### Step 6: Deactivate the environment
+When you're done, deactivate the environment by:
+```bash
+conda deactivate
+```
+2. Upload a P&ID image through the interface.
+3. Follow the sequential steps of symbol, text, and line detection.
+4. View the generated graph and AI agent's reasoning in the real-time chat box.
+5. Save and export the results if satisfactory.
+## Folder Structure
+```
+├── assets/
+│   └── AiAgent.png
+│   └── llm.png
+├── gradioApp.py
+├── symbol_detection.py
+├── text_detection_combined.py
+├── line_detection_ai.py
+├── data_aggregation.py
+├── graph_construction.py
+├── graph_processor.py
+├── pdf_processor.py
+├── pnid_agent.py
+├── requirements.txt
+├── results/
+├── models/
+│   └── symbol_detection_model.pth
+```
+## /models Folder
+- **models/symbol_detection_model.pth**: This folder contains the pre-trained model for symbol detection in P&ID diagrams. This model is crucial for detecting key symbols such as valves, instruments, and pipes in the diagram. Make sure to download the model and place it in the `/models` directory before running the app.
+## Future Work
+- **Advanced Symbol Recognition**: Improve symbol detection by integrating more sophisticated recognition models.
+- **Graph Enhancement**: Introduce more complex graph structures and logic for representing the relationships between the diagram's elements.
+- **Data Export**: Allow export in additional formats such as DEXPI-compliant XML or JSON.
+# Docker Information
+We'll cover the basic docker operations here.
+## Building
+There is a dockerfile for each different project (they have slightly different requiremnts).
+###  `gradioChatApp.py`
+Run this one as follows:
+```
+> docker build -t exp-pnid-to-graph_chat-w-graph:0.0.4 -f Dockerfile-chatApp .
+> docker tag exp-pnid-to-graph_chat-w-graph:0.0.4 intaicr.azurecr.io/intai/exp-pnid-to-graph_chat-w-graph:0.0.4
+```
+## Deploying to ACR
+###  `gradioChatApp.py`
+```
+> az login
+> az acr login --name intaicr
+> docker push intaicr.azurecr.io/intai/exp-pnid-to-graph_chat-w-graph:0.0.4
+```
+## Models
+### Symbol Detection Models
+- **Intui_SDM_41.pt**: Primary model for equipment and large symbol detection
+  - Classes: Equipment, Vessels, Heat Exchangers
+  - Input Resolution: 1280x1280
+  - Confidence Threshold: 0.3-0.7 (adaptive)
+- **Intui_SDM_20.pt**: Secondary model for instrument and small symbol detection
+  - Classes: Instruments, Valves, Indicators
+  - Input Resolution: 1280x1280
+  - Confidence Threshold: 0.3-0.7 (adaptive)
+### Line Detection Model
+- **intui_LDM_01.pt**: Specialized model for line and connection detection
+  - Classes: Solid Lines, Dashed Lines
+  - Input Resolution: 1280x1280
+  - Confidence Threshold: 0.5
+### Text Detection Models
+- **Tesseract**: v5.3.0
+  - Configuration:
+    - OEM Mode: 3 (Default)
+    - PSM Mode: 11 (Sparse text)
+    - Custom Whitelist: A-Z, 0-9, special characters
+- **EasyOCR**: v1.7.1
+  - Configuration:
+    - Language: English
+    - Paragraph Mode: False
+    - Height Threshold: 2.0
+    - Width Threshold: 2.0
+    - Contrast Threshold: 0.2
+- **DocTR**: v0.6.0
+  - Models:
+    - fast_base-688a8b34.pt
+    - crnn_vgg16_bn-9762b0b0.pt
+# P&ID Line Detection
+A deep learning-based pipeline for detecting lines in P&ID diagrams using DeepLSD.
+## Architecture
+```mermaid
+graph TD
+    A[Input Image] --> B[Line Detection]
+    B --> C[DeepLSD Model]
+    C --> D[Post-processing]
+    D --> E[Output JSON/Image]
+    subgraph Line Detection Pipeline
+    B --> F[Image Preprocessing]
+    F --> G[Scale Image 0.1x]
+    G --> H[Grayscale Conversion]
+    H --> C
+    C --> I[Scale Coordinates]
+    I --> J[Draw Lines]
+    J --> E
+    end
+```
+## Setup
+### Prerequisites
+- Python 3.12+
+- uv (for dependency management)
+- Git
+- CUDA-capable GPU (optional)
+### Installation
+1. Clone the repository:
+```bash
+git clone https://github.com/IntuigenceAI/intui-PnID-POC.git
+cd intui-PnID-POC
+```
+2. Install dependencies using uv:
+```bash
+# Install uv if you haven't already
+curl -LsSf https://astral.sh/uv/install.sh | sh
+# Create and activate virtual environment
+uv venv
+source .venv/bin/activate  # On Windows: .venv\Scripts\activate
+# Install dependencies
+uv pip install -r requirements.txt
+```
+3. Download DeepLSD model:
+```bash
+python download_model.py
+```
+## Usage
+1. Run the line detection:
+```bash
+python line_detection_ai.py
+```
+The script will:
+- Load the DeepLSD model
+- Process input images at 0.1x scale for performance
+- Generate line detections
+- Save results as JSON and annotated images
+## Configuration
+Key parameters in `line_detection_ai.py`:
+- `scale_factor`: Image scaling (default: 0.1)
+- `device`: CPU/GPU selection
+- `mask_json_paths`: Paths to text/symbol detection results
+## Input/Output
+### Input
+- Original P&ID images
+- Optional text/symbol detection JSON files for masking
+### Output
+- Annotated images with detected lines
+- JSON files containing line coordinates and metadata
+## Project Structure
+```
+├── line_detection_ai.py    # Main line detection script
+├── detectors.py           # Line detector implementation
+├── download_model.py      # Model download utility
+├── models/               # Directory for model files
+│   └── deeplsd_md.tar   # DeepLSD model weights
+├── results/              # Output directory
+└── requirements.txt      # Project dependencies
+```
+## Dependencies
+Key dependencies:
+- torch
+- opencv-python
+- numpy
+- DeepLSD
+See `requirements.txt` for the complete list.
+## Contributing
+1. Fork the repository
+2. Create your feature branch (`git checkout -b feature/amazing-feature`)
+3. Commit your changes (`git commit -m 'Add some amazing feature'`)
+4. Push to the branch (`git push origin feature/amazing-feature`)
+5. Open a Pull Request
+## License
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+## Acknowledgments
+- [DeepLSD](https://github.com/cvg/DeepLSD) for the line detection model
+- Original P&ID processing pipeline by IntuigenceAI
+---
+title: PnID Diagram Analyzer
+emoji: 🔍
+colorFrom: blue
+colorTo: red
+sdk: gradio
+sdk_version: 4.19.2
+app_file: gradioChatApp.py
+pinned: false
+---
+# PnID Diagram Analyzer
+This app analyzes PnID diagrams using AI to detect and interpret various elements.
+## Features
+- Line detection
+- Symbol recognition
+- Text detection
+- Graph construction
+# Intuigence P&ID Analyzer
+Interactive P&ID analysis tool powered by AI.
+## Features
+- P&ID Document Processing
+- Symbol Detection
+- Text Recognition
+- Line Detection
+- Knowledge Graph Generation
+- Interactive Chat Interface
+## Usage
+1. Upload a P&ID document
+2. Click "Process Document"
+3. View results in different tabs
+4. Ask questions about the P&ID in the chat

assets/AiAgent.png ADDED Viewed

assets/intuigence.png ADDED Viewed

assets/user.png ADDED Viewed

base.py ADDED Viewed

	@@ -0,0 +1,138 @@

+import torch
+from abc import ABC, abstractmethod
+from typing import List, Optional, Dict
+import numpy as np
+import cv2
+from pathlib import Path
+from loguru import logger
+import json
+from common import DetectionResult
+from storage import StorageInterface
+from utils import DebugHandler, CoordinateTransformer
+class BaseConfig(ABC):
+    """Abstract Base Config for all configuration classes."""
+    def __post_init__(self):
+        """Ensures default values are set correctly for all configs."""
+        pass
+class BaseDetector(ABC):
+    """Abstract base class for detection models."""
+    def __init__(self,
+                 config: BaseConfig,
+                 debug_handler: DebugHandler = None):
+        self.config = config
+        self.debug_handler = debug_handler or DebugHandler()
+    @abstractmethod
+    def _load_model(self, model_path: str):
+        """Load and return the detection model."""
+        pass
+    @abstractmethod
+    def detect(self, image: np.ndarray, *args, **kwargs):
+        """Run detection on an input image."""
+        pass
+    @abstractmethod
+    def _preprocess(self, image: np.ndarray) -> np.ndarray:
+        """Preprocess the input image before detection."""
+        pass
+    @abstractmethod
+    def _postprocess(self, image: np.ndarray) -> np.ndarray:
+        """Postprocess the input image before detection."""
+        pass
+class BaseDetectionPipeline(ABC):
+    """Abstract base class for detection pipelines."""
+    def __init__(
+            self,
+            storage: StorageInterface,
+            debug_handler=None
+    ):
+        # self.detector = detector
+        self.storage = storage
+        self.debug_handler = debug_handler or DebugHandler()
+        self.transformer = CoordinateTransformer()
+    @abstractmethod
+    def process_image(
+            self,
+            image_path: str,
+            output_dir: str,
+            config
+    ) -> DetectionResult:
+        """Main processing pipeline for a single image."""
+        pass
+    def _apply_roi(self, image: np.ndarray, roi: np.ndarray) -> np.ndarray:
+        """Apply region of interest cropping."""
+        if roi is not None and len(roi) == 4:
+            x_min, y_min, x_max, y_max = roi
+            return image[y_min:y_max, x_min:x_max]
+        return image
+    def _adjust_coordinates(self, detections: List[Dict], roi: np.ndarray) -> List[Dict]:
+        """Adjust detection coordinates based on ROI"""
+        if roi is None or len(roi) != 4:
+            return detections
+        x_offset, y_offset = roi[0], roi[1]
+        adjusted = []
+        for det in detections:
+            try:
+                adjusted_bbox = [
+                    int(det["bbox"][0] + x_offset),
+                    int(det["bbox"][1] + y_offset),
+                    int(det["bbox"][2] + x_offset),
+                    int(det["bbox"][3] + y_offset)
+                ]
+                adjusted_det = {**det, "bbox": adjusted_bbox}
+                adjusted.append(adjusted_det)
+            except KeyError:
+                logger.warning("Invalid detection format during coordinate adjustment")
+        return adjusted
+    def _persist_results(
+            self,
+            output_dir: str,
+            image_path: str,
+            detections: List[Dict],
+            annotated_image: Optional[np.ndarray]
+    ) -> Dict[str, str]:
+        """Save detection results and annotations"""
+        self.storage.create_directory(output_dir)
+        base_name = Path(image_path).stem
+        # Save JSON results
+        json_path = Path(output_dir) / f"{base_name}_lines.json"
+        self.storage.save_file(
+            str(json_path),
+            json.dumps({
+                "solid_lines": {"lines": detections},
+                "dashed_lines": {"lines": []}
+            }, indent=2).encode('utf-8')
+        )
+        # Save annotated image
+        img_path = None
+        if annotated_image is not None:
+            img_path = Path(output_dir) / f"{base_name}_annotated.jpg"
+            _, img_data = cv2.imencode('.jpg', annotated_image)
+            self.storage.save_file(str(img_path), img_data.tobytes())
+        return {
+            "json_path": str(json_path),
+            "image_path": str(img_path) if img_path else None
+        }

base_config.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from dataclasses import dataclass
+@dataclass
+class BaseConfig:
+    """Base configuration class"""
+    def __init__(self, **kwargs):
+        for key, value in kwargs.items():
+            setattr(self, key, value)

chatbot_agent.py ADDED Viewed

	@@ -0,0 +1,194 @@

+# chatbot_agent.py
+import os
+import json
+import re
+from openai import OpenAI
+import traceback
+import logging
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Get logger
+logger = logging.getLogger(__name__)
+# Initialize OpenAI client with error handling
+def get_openai_client():
+    api_key = os.getenv("OPENAI_API_KEY")
+    if not api_key:
+        raise ValueError("OpenAI API key not found in environment variables")
+    return OpenAI(api_key=api_key)
+def format_message(role, content):
+    """Format message for chatbot history."""
+    return {"role": role, "content": content}
+def initialize_graph_prompt(graph_data):
+    """Initialize the conversation with available node and edge information."""
+    try:
+        # Get summary info with safe fallbacks
+        summary = graph_data.get('summary', {})
+        summary_parts = []
+        # Only include counts that exist
+        if 'symbol_count' in summary:
+            summary_parts.append(f"Symbols: {summary['symbol_count']}")
+        if 'text_count' in summary:
+            summary_parts.append(f"Texts: {summary['text_count']}")
+        if 'line_count' in summary:
+            summary_parts.append(f"Lines: {summary['line_count']}")
+        if 'edge_count' in summary:
+            summary_parts.append(f"Edges: {summary['edge_count']}")
+        summary_info = ", ".join(summary_parts) + "."
+        # Prepare node details only if they exist
+        node_details = ""
+        detailed_results = graph_data.get('detailed_results', {})
+        if 'symbols' in detailed_results:
+            node_details = "Nodes (symbols) in the graph include:\n"
+            for symbol in detailed_results['symbols']:
+                details = []
+                if 'symbol_id' in symbol:
+                    details.append(f"ID: {symbol['symbol_id']}")
+                if 'class_id' in symbol:
+                    details.append(f"Class: {symbol['class_id']}")
+                if 'category' in symbol:
+                    details.append(f"Category: {symbol['category']}")
+                if 'type' in symbol:
+                    details.append(f"Type: {symbol['type']}")
+                if 'label' in symbol:
+                    details.append(f"Label: {symbol['label']}")
+                if details:  # Only add if we have any details
+                    node_details += ", ".join(details) + "\n"
+        initial_prompt = (
+            "You have access to a knowledge graph generated from a P&ID diagram. "
+            f"The summary information includes:\n{summary_info}\n\n"
+            f"{node_details}\n"
+            "Answer questions about the P&ID elements using this information."
+        )
+        return initial_prompt
+    except Exception as e:
+        logger.error(f"Error creating initial prompt: {str(e)}")
+        return ("I have access to a P&ID diagram knowledge graph. "
+                "I can help answer questions about the diagram elements.")
+def get_assistant_response(user_message, json_path):
+    """Generate response based on P&ID data and OpenAI."""
+    try:
+        client = get_openai_client()
+        # Load the aggregated data
+        with open(json_path, 'r') as f:
+            data = json.load(f)
+        # Process the user's question
+        question = user_message.lower()
+        # Use rule-based responses for specific questions
+        if "valve" in question or "valves" in question:
+            valve_count = sum(1 for symbol in data.get('symbols', [])
+                            if 'class' in symbol and 'valve' in symbol['class'].lower())
+            return f"I found {valve_count} valves in this P&ID."
+        elif "pump" in question or "pumps" in question:
+            pump_count = sum(1 for symbol in data.get('symbols', [])
+                           if 'class' in symbol and 'pump' in symbol['class'].lower())
+            return f"I found {pump_count} pumps in this P&ID."
+        elif "equipment" in question or "components" in question:
+            equipment_types = {}
+            for symbol in data.get('symbols', []):
+                if 'class' in symbol:
+                    eq_type = symbol['class']
+                    equipment_types[eq_type] = equipment_types.get(eq_type, 0) + 1
+            response = "Here's a summary of the equipment I found:\n"
+            for eq_type, count in equipment_types.items():
+                response += f"- {eq_type}: {count}\n"
+            return response
+        # For other questions, use OpenAI
+        else:
+            # Prepare the conversation context
+            graph_data = {
+                "summary": {
+                    "symbol_count": len(data.get('symbols', [])),
+                    "text_count": len(data.get('texts', [])),
+                    "line_count": len(data.get('lines', [])),
+                    "edge_count": len(data.get('edges', [])),
+                },
+                "detailed_results": data
+            }
+            initial_prompt = initialize_graph_prompt(graph_data)
+            conversation = [
+                {"role": "system", "content": initial_prompt},
+                {"role": "user", "content": user_message}
+            ]
+            response = client.chat.completions.create(
+                model="gpt-4-turbo",
+                messages=conversation
+            )
+            return response.choices[0].message.content
+    except Exception as e:
+        logger.error(f"Error in get_assistant_response: {str(e)}")
+        logger.error(traceback.format_exc())
+        return "I apologize, but I encountered an error analyzing the P&ID data. Please try asking a different question."
+# Testing and Usage block
+if __name__ == "__main__":
+    # Load the knowledge graph data from JSON file
+    json_file_path = "results/0_aggregated_detections.json"
+    try:
+        with open(json_file_path, 'r') as file:
+            graph_data = json.load(file)
+    except FileNotFoundError:
+        print(f"Error: File not found at {json_file_path}")
+        graph_data = None
+    except json.JSONDecodeError:
+        print("Error: Failed to decode JSON. Please check the file format.")
+        graph_data = None
+    # Initialize conversation history with assistant's welcome message
+    history = [format_message("assistant", "Hello! I am ready to answer your questions about the P&ID knowledge graph. The graph includes nodes (symbols), edges, linkers, and text tags, and I have detailed information available about each. Please ask any questions related to these elements and their connections.")]
+    # Print the assistant's welcome message
+    print("Assistant:", history[0]["content"])
+    # Individual Testing Options
+    if graph_data:
+        # Option 1: Test the graph prompt initialization
+        print("\n--- Test: Graph Prompt Initialization ---")
+        initial_prompt = initialize_graph_prompt(graph_data)
+        print(initial_prompt)
+        # Option 2: Simulate a conversation with a test question
+        print("\n--- Test: Simulate Conversation ---")
+        test_question = "Can you tell me about the connections between the nodes?"
+        history.append(format_message("user", test_question))
+        print(f"\nUser: {test_question}")
+        for response in get_assistant_response(test_question, json_file_path):
+            print("Assistant:", response)
+            history.append(format_message("assistant", response))
+        # Option 3: Manually input questions for interactive testing
+        while True:
+            user_question = input("\nYou: ")
+            if user_question.lower() in ["exit", "quit"]:
+                print("Exiting chat. Goodbye!")
+                break
+            history.append(format_message("user", user_question))
+            for response in get_assistant_response(user_question, json_file_path):
+                print("Assistant:", response)
+                history.append(format_message("assistant", response))
+    else:
+        print("Unable to load graph data. Please check the file path and format.")

common.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from dataclasses import dataclass
+from typing import List, Dict, Optional, Tuple, Union
+import numpy as np
+from detection_schema import Line
+@dataclass
+class DetectionResult:
+    success: bool
+    error: Optional[str] = None
+    annotated_image: Optional[np.ndarray] = None
+    processing_time: float = 0.0
+    json_path: Optional[str] = None
+    image_path: Optional[str] = None

config.py ADDED Viewed

	@@ -0,0 +1,97 @@

+from dataclasses import dataclass, field
+from typing import List, Dict, Optional, Tuple, Union
+import numpy as np
+from base import BaseConfig
+@dataclass
+class ImageConfig(BaseConfig):
+    """Configuration for global image-related settings"""
+    roi: Optional[np.ndarray] = field(default_factory=lambda: np.array([500, 500, 5300, 4000]))
+    mask_json_path: str = "./text_and_symbol_bboxes.json"
+    save_annotations: bool = True
+    annotation_style: Dict = field(default_factory=lambda: {
+        'bbox_color': (255, 0, 0),
+        'line_color': (0, 255, 0),
+        'text_color': (0, 0, 255),
+        'thickness': 2,
+        'font_scale': 0.6
+    })
+@dataclass
+class SymbolConfig(BaseConfig):
+    """Configuration for Symbol Detection"""
+    model_path: str = "models/symbol_detection.pt"
+    confidence_threshold: float = 0.5
+    nms_threshold: float = 0.3
+    min_size: Tuple[int, int] = (10, 10)
+    max_size: Tuple[int, int] = (200, 200)
+    class_names: List[str] = field(default_factory=lambda: ["background", "valve", "pump", "sensor"])
+    # Optional: Keep the multiple thresholds for experimentation
+    confidence_thresholds: List[float] = field(default_factory=lambda: [0.1, 0.3, 0.5, 0.7, 0.9])
+    model_paths: Dict[str, str] = field(default_factory=lambda: {
+        "model1": "models/Intui_SDM_41.pt",
+        "model2": "models/Intui_SDM_20.pt"
+    })
+@dataclass
+class TagConfig(BaseConfig):
+    """Configuration for Tag Detection with OCR"""
+    model_path: str = "models/tag_detection.json"
+    confidence_threshold: float = 0.5
+    iou_threshold: float = 0.4
+    ocr_engines: List[str] = field(default_factory=lambda: ['tesseract', 'easyocr', 'doctr'])
+    text_patterns: Dict[str, str] = field(default_factory=lambda: {
+        'Line_Number': r"\d{1,5}-[A-Z]{2,4}-\d{1,3}",
+        'Equipment_Tag': r"[A-Z]{1,3}-[A-Z0-9]{1,4}-\d{1,3}",
+        'Instrument_Tag': r"\d{2,3}-[A-Z]{2,4}-\d{2,3}",
+        'Valve_Number': r"[A-Z]{1,2}-\d{3}",
+        'Pipe_Size': r"\d{1,2}\"",
+        'Flow_Direction': r"FROM|TO",
+        'Service_Description': r"STEAM|WATER|AIR|GAS|DRAIN",
+        'Process_Instrument': r"\d{2,3}(?:-[A-Z]{2,3})?-\d{2,3}|[A-Z]{2,3}-\d{2,3}",
+        'Nozzle': r"N[0-9]{1,2}|MH",
+        'Pipe_Connector': r"[0-9]{1,5}|[A-Z]{1,2}[0-9]{2,5}"
+    })
+    tesseract_config: str = r'--oem 3 --psm 11'
+    easyocr_params: Dict = field(default_factory=lambda: {
+        'paragraph': False,
+        'height_ths': 2.0,
+        'width_ths': 2.0,
+        'contrast_ths': 0.2
+    })
+@dataclass
+class LineConfig(BaseConfig):
+    """Configuration for Line Detection"""
+    threshold_distance: float = 10.0
+    expansion_factor: float = 1.1
+@dataclass
+class PointConfig(BaseConfig):
+    """Configuration for Point Detection"""
+    threshold_distance: float = 10.0
+@dataclass
+class JunctionConfig(BaseConfig):
+    """Configuration for Junction Detection"""
+    window_size: int = 21
+    radius: int = 5
+    angle_threshold_lb: float = 15.0
+    angle_threshold_ub: float = 75.0
+# @dataclass
+# class JunctionConfig:
+#     radius: int = 5
+#     angle_threshold: float = 25.0
+#     colinear_threshold: float = 5.0
+#     connection_threshold: float = 5.0

data_aggregation_ai.py ADDED Viewed

	@@ -0,0 +1,411 @@

+from pathlib import Path
+import json
+import logging
+from datetime import datetime
+from typing import List, Dict, Optional, Tuple
+from storage import StorageFactory
+import uuid
+import traceback
+import os
+import cv2
+import numpy as np
+logger = logging.getLogger(__name__)
+class DataAggregator:
+    def __init__(self, storage=None):
+        self.storage = storage or StorageFactory.get_storage()
+        self.logger = logging.getLogger(__name__)
+    def _parse_line_data(self, lines_data: dict) -> List[dict]:
+        """Parse line detection data with coordinate validation"""
+        parsed_lines = []
+        for line in lines_data.get("lines", []):
+            try:
+                # Extract and validate line coordinates
+                start_coords = line["start"]["coords"]
+                end_coords = line["end"]["coords"]
+                bbox = line["bbox"]
+                # Validate coordinates
+                if not (self._is_valid_point(start_coords) and
+                       self._is_valid_point(end_coords) and
+                       self._is_valid_bbox(bbox)):
+                    self.logger.warning(f"Invalid coordinates in line: {line['id']}")
+                    continue
+                # Create parsed line with validated coordinates
+                parsed_line = {
+                    "id": line["id"],
+                    "start_point": {
+                        "x": int(start_coords["x"]),
+                        "y": int(start_coords["y"]),
+                        "type": line["start"]["type"],
+                        "confidence": line["start"]["confidence"]
+                    },
+                    "end_point": {
+                        "x": int(end_coords["x"]),
+                        "y": int(end_coords["y"]),
+                        "type": line["end"]["type"],
+                        "confidence": line["end"]["confidence"]
+                    },
+                    "bbox": {
+                        "xmin": int(bbox["xmin"]),
+                        "ymin": int(bbox["ymin"]),
+                        "xmax": int(bbox["xmax"]),
+                        "ymax": int(bbox["ymax"])
+                    },
+                    "style": line["style"],
+                    "confidence": line["confidence"]
+                }
+                parsed_lines.append(parsed_line)
+            except Exception as e:
+                self.logger.error(f"Error parsing line {line.get('id')}: {str(e)}")
+                continue
+        return parsed_lines
+    def _is_valid_point(self, point: dict) -> bool:
+        """Validate point coordinates"""
+        try:
+            x, y = point.get("x"), point.get("y")
+            return (isinstance(x, (int, float)) and
+                    isinstance(y, (int, float)) and
+                    0 <= x <= 10000 and 0 <= y <= 10000)  # Adjust range as needed
+        except:
+            return False
+    def _is_valid_bbox(self, bbox: dict) -> bool:
+        """Validate bbox coordinates"""
+        try:
+            xmin = bbox.get("xmin")
+            ymin = bbox.get("ymin")
+            xmax = bbox.get("xmax")
+            ymax = bbox.get("ymax")
+            return (isinstance(xmin, (int, float)) and
+                    isinstance(ymin, (int, float)) and
+                    isinstance(xmax, (int, float)) and
+                    isinstance(ymax, (int, float)) and
+                    xmin < xmax and ymin < ymax and
+                    0 <= xmin <= 10000 and 0 <= ymin <= 10000 and
+                    0 <= xmax <= 10000 and 0 <= ymax <= 10000)
+        except:
+            return False
+    def _create_graph_data(self, lines: List[dict], symbols: List[dict], texts: List[dict]) -> Tuple[List[dict], List[dict]]:
+        """Create graph nodes and edges from detections"""
+        nodes = []
+        edges = []
+        # Debug input data
+        self.logger.info("Creating graph data with:")
+        self.logger.info(f"Lines: {len(lines)}")
+        self.logger.info(f"Symbols: {len(symbols)}")
+        self.logger.info(f"Texts: {len(texts)}")
+        try:
+            # Process symbols
+            for symbol in symbols:
+                bbox = symbol["bbox"]  # bbox is a list [x1,y1,x2,y2]
+                nodes.append({
+                    "id": symbol["symbol_id"],
+                    "type": "symbol",
+                    "category": symbol.get("category", ""),
+                    "label": symbol.get("label", ""),
+                    "confidence": symbol.get("confidence", 0.0),
+                    "x": (bbox[0] + bbox[2]) / 2,  # Use list indices
+                    "y": (bbox[1] + bbox[3]) / 2,  # Use list indices
+                    "bbox": {  # Convert to dict format for consistency
+                        "xmin": bbox[0],
+                        "ymin": bbox[1],
+                        "xmax": bbox[2],
+                        "ymax": bbox[3]
+                    }
+                })
+            # Process texts
+            for text in texts:
+                bbox = text["bbox"]  # bbox is a list [x1,y1,x2,y2]
+                nodes.append({
+                    "id": str(uuid.uuid4()),
+                    "type": "text",
+                    "content": text.get("text", ""),
+                    "confidence": text.get("confidence", 0.0),
+                    "x": (bbox[0] + bbox[2]) / 2,  # Use list indices
+                    "y": (bbox[1] + bbox[3]) / 2,  # Use list indices
+                    "bbox": {  # Convert to dict format for consistency
+                        "xmin": bbox[0],
+                        "ymin": bbox[1],
+                        "xmax": bbox[2],
+                        "ymax": bbox[3]
+                    }
+                })
+            # Process lines (unchanged)
+            for line in lines:
+                edges.append({
+                    "id": str(uuid.uuid4()),
+                    "type": "line",
+                    "start_point": line["start_point"],
+                    "end_point": line["end_point"]
+                })
+        except Exception as e:
+            self.logger.error(f"Error processing data: {str(e)}")
+            self.logger.error("Current symbol/text being processed: %s",
+                             json.dumps(symbol if 'symbol' in locals() else text, indent=2))
+            raise
+        return nodes, edges
+    def _validate_coordinates(self, data, data_type):
+        """Validate coordinates in the data"""
+        if not data:
+            return False
+        try:
+            if data_type == 'line':
+                # Check start and end points
+                start = data.get('start_point', {})
+                end = data.get('end_point', {})
+                bbox = data.get('bbox', {})
+                required_fields = ['x', 'y', 'type']
+                if not all(field in start for field in required_fields):
+                    self.logger.warning(f"Missing required fields in start_point: {start}")
+                    return False
+                if not all(field in end for field in required_fields):
+                    self.logger.warning(f"Missing required fields in end_point: {end}")
+                    return False
+                # Validate bbox coordinates
+                if not all(key in bbox for key in ['xmin', 'ymin', 'xmax', 'ymax']):
+                    self.logger.warning(f"Invalid bbox format: {bbox}")
+                    return False
+                # Check coordinate consistency
+                if bbox['xmin'] > bbox['xmax'] or bbox['ymin'] > bbox['ymax']:
+                    self.logger.warning(f"Invalid bbox coordinates: {bbox}")
+                    return False
+            elif data_type in ['symbol', 'text']:
+                bbox = data.get('bbox', {})
+                if not all(key in bbox for key in ['xmin', 'ymin', 'xmax', 'ymax']):
+                    self.logger.warning(f"Invalid {data_type} bbox format: {bbox}")
+                    return False
+                # Check coordinate consistency
+                if bbox['xmin'] > bbox['xmax'] or bbox['ymin'] > bbox['ymax']:
+                    self.logger.warning(f"Invalid {data_type} bbox coordinates: {bbox}")
+                    return False
+            return True
+        except Exception as e:
+            self.logger.error(f"Validation error for {data_type}: {str(e)}")
+            return False
+    def aggregate_data(self, symbols_path: str, texts_path: str, lines_path: str) -> dict:
+        """Aggregate detection results and create graph structure"""
+        try:
+            # Load line detection results
+            lines_data = json.loads(self.storage.load_file(lines_path).decode('utf-8'))
+            lines = self._parse_line_data(lines_data)
+            self.logger.info(f"Loaded {len(lines)} lines")
+            # Load and debug symbol detections
+            symbols = []
+            if symbols_path and Path(symbols_path).exists():
+                with open(symbols_path, 'r') as f:
+                    symbols_data = json.load(f)
+                    # Debug symbol data structure
+                    self.logger.info("Symbol data keys: %s", list(symbols_data.keys()))
+                    self.logger.info("First symbol in detections: %s",
+                                   json.dumps(symbols_data["detections"][0], indent=2))
+                    symbols = symbols_data.get("detections", [])
+                    self.logger.info(f"Loaded {len(symbols)} symbols from {symbols_path}")
+                    # Debug first symbol structure
+                    if symbols:
+                        self.logger.info("First symbol keys: %s", list(symbols[0].keys()))
+                        self.logger.info("First symbol bbox: %s", symbols[0]["bbox"])
+            # Load and debug text detections
+            texts = []
+            if texts_path and Path(texts_path).exists():
+                with open(texts_path, 'r') as f:
+                    texts_data = json.load(f)
+                    # Debug text data structure
+                    self.logger.info("Text data keys: %s", list(texts_data.keys()))
+                    self.logger.info("First text in detections: %s",
+                                   json.dumps(texts_data["detections"][0], indent=2))
+                    texts = texts_data.get("detections", [])
+                    self.logger.info(f"Loaded {len(texts)} texts from {texts_path}")
+                    # Debug first text structure
+                    if texts:
+                        self.logger.info("First text keys: %s", list(texts[0].keys()))
+                        self.logger.info("First text bbox: %s", texts[0]["bbox"])
+            # Create graph data
+            nodes, edges = self._create_graph_data(lines, symbols, texts)
+            self.logger.info(f"Created graph with {len(nodes)} nodes and {len(edges)} edges")
+            return {
+                "lines": lines,
+                "symbols": symbols,
+                "texts": texts,
+                "nodes": nodes,
+                "edges": edges,
+                "metadata": {
+                    "timestamp": datetime.now().isoformat(),
+                    "version": "2.0"
+                }
+            }
+        except Exception as e:
+            self.logger.error(f"Error during aggregation: {str(e)}")
+            self.logger.error("Stack trace:", exc_info=True)  # Add full stack trace
+            raise
+    def _draw_aggregated_view(self, image: np.ndarray, results: dict) -> np.ndarray:
+        """Draw all detections on image"""
+        annotated = image.copy()
+        # Draw lines (green)
+        for line in results.get('lines', []):
+            try:
+                cv2.line(annotated,
+                        (line['start_point']['x'], line['start_point']['y']),
+                        (line['end_point']['x'], line['end_point']['y']),
+                        (0, 255, 0), 2)
+            except Exception as e:
+                self.logger.warning(f"Skipping invalid line: {str(e)}")
+                continue
+        # Draw symbols (cyan) - Fix bbox access
+        for symbol in results.get('symbols', []):
+            try:
+                bbox = symbol['bbox']
+                # bbox is a list [x1,y1,x2,y2], not a dict
+                cv2.rectangle(annotated,
+                             (bbox[0], bbox[1]),  # Use list indices
+                             (bbox[2], bbox[3]),  # Use list indices
+                             (255, 255, 0), 2)
+            except Exception as e:
+                self.logger.warning(f"Skipping invalid symbol: {str(e)}")
+                continue
+        # Draw texts (purple) - Fix bbox access
+        for text in results.get('texts', []):
+            try:
+                bbox = text['bbox']
+                # bbox is a list [x1,y1,x2,y2], not a dict
+                cv2.rectangle(annotated,
+                             (bbox[0], bbox[1]),  # Use list indices
+                             (bbox[2], bbox[3]),  # Use list indices
+                             (128, 0, 128), 2)
+            except Exception as e:
+                self.logger.warning(f"Skipping invalid text: {str(e)}")
+                continue
+        return annotated
+    def process_data(self, image_path: str, output_dir: str, symbols_path: str, texts_path: str, lines_path: str):
+        try:
+            self.logger.info(f"Processing data with:")
+            self.logger.info(f"- Image: {image_path}")
+            self.logger.info(f"- Symbols: {symbols_path}")
+            self.logger.info(f"- Texts: {texts_path}")
+            self.logger.info(f"- Lines: {lines_path}")
+            base_name = Path(image_path).stem
+            self.logger.info(f"Base name: {base_name}")
+            aggregated_json = os.path.join(output_dir, f"{base_name}_aggregated.json")
+            self.logger.info(f"Will save aggregated data to: {aggregated_json}")
+            results = self.aggregate_data(symbols_path, texts_path, lines_path)
+            self.logger.info("Data aggregation completed")
+            with open(aggregated_json, 'w') as f:
+                json.dump(results, f, indent=2)
+            self.logger.info(f"Saved aggregated JSON to: {aggregated_json}")
+            # Create visualization using original image
+            image = cv2.imread(image_path)
+            annotated = self._draw_aggregated_view(image, results)
+            aggregated_image = os.path.join(output_dir, f"{base_name}_aggregated.png")
+            cv2.imwrite(aggregated_image, annotated)
+            # Return paths like other detectors
+            return {
+                'success': True,
+                'image_path': aggregated_image,
+                'json_path': aggregated_json
+            }
+        except Exception as e:
+            self.logger.error(f"Error in data aggregation: {str(e)}")
+            return {
+                'success': False,
+                'error': str(e)
+            }
+if __name__ == "__main__":
+    import os
+    from pprint import pprint
+    # Initialize the aggregator
+    aggregator = DataAggregator()
+    # Test paths using actual files in results folder
+    results_dir = "results"
+    base_name = "002_page_1"
+    # Input paths
+    symbols_path = os.path.join(results_dir, f"{base_name}_detected_symbols.json")
+    texts_path = os.path.join(results_dir, f"{base_name}_detected_texts.json")
+    lines_path = os.path.join(results_dir, f"{base_name}_detected_lines.json")
+    # Verify files exist
+    print(f"\nChecking input files:")
+    print(f"Symbols file exists: {os.path.exists(symbols_path)}")
+    print(f"Texts file exists: {os.path.exists(texts_path)}")
+    print(f"Lines file exists: {os.path.exists(lines_path)}")
+    try:
+        # Process the data
+        print("\nProcessing data...")
+        result = aggregator.process_data(
+            image_path=os.path.join(results_dir, f"{base_name}.png"),
+            output_dir=results_dir,
+            symbols_path=symbols_path,
+            texts_path=texts_path,
+            lines_path=lines_path
+        )
+        # Verify output files
+        aggregated_json = os.path.join(results_dir, f"{base_name}_aggregated.json")
+        aggregated_image = os.path.join(results_dir, f"{base_name}_aggregated.png")
+        print("\nChecking output files:")
+        print(f"Aggregated JSON exists: {os.path.exists(aggregated_json)}")
+        print(f"Aggregated image exists: {os.path.exists(aggregated_image)}")
+        # Load and print statistics from aggregated result
+        if os.path.exists(aggregated_json):
+            with open(aggregated_json, 'r') as f:
+                data = json.load(f)
+                print("\nAggregation Results:")
+                print(f"Number of Symbols: {len(data.get('symbols', []))}")
+                print(f"Number of Texts: {len(data.get('texts', []))}")
+                print(f"Number of Lines: {len(data.get('lines', []))}")
+                print(f"Number of Nodes: {len(data.get('nodes', []))}")
+                print(f"Number of Edges: {len(data.get('edges', []))}")
+    except Exception as e:
+        print(f"\nError during testing: {str(e)}")
+        traceback.print_exc()

detection_schema.py ADDED Viewed

	@@ -0,0 +1,481 @@

+from dataclasses import dataclass, field
+from typing import List, Optional, Tuple, Dict
+import uuid
+from enum import Enum
+import json
+import numpy as np
+# ======================== Point ======================== #
+class ConnectionType(Enum):
+    SOLID = "solid"
+    DASHED = "dashed"
+    PHANTOM = "phantom"
+@dataclass
+class Coordinates:
+    x: int
+    y: int
+@dataclass
+class BBox:
+    xmin: int
+    ymin: int
+    xmax: int
+    ymax: int
+    def width(self) -> int:
+        return self.xmax - self.xmin
+    def height(self) -> int:
+        return self.ymax - self.ymin
+class JunctionType(str, Enum):
+    T = "T"
+    L = "L"
+    END = "END"
+@dataclass
+class Point:
+    coords: Coordinates
+    bbox: BBox
+    type: JunctionType
+    confidence: float = 1.0
+    id: str = field(default_factory=lambda: str(uuid.uuid4()))
+# # ======================== Symbol ======================== #
+# class SymbolType(Enum):
+#     VALVE = "valve"
+#     PUMP = "pump"
+#     SENSOR = "sensor"
+#     # Add others as needed
+#
+class ValveSubtype(Enum):
+    GATE = "gate"
+    GLOBE = "globe"
+    BUTTERFLY = "butterfly"
+#
+# @dataclass
+# class Symbol:
+#     symbol_type: SymbolType
+#     bbox: BBox
+#     center: Coordinates
+#     connections: List[Point] = field(default_factory=list)
+#     subtype: Optional[ValveSubtype] = None
+#     id: str = field(default_factory=lambda: str(uuid.uuid4()))
+#     confidence: float = 0.95
+#     model_metadata: dict = field(default_factory=dict)
+# ======================== Symbol ======================== #
+class SymbolType(Enum):
+    VALVE = "valve"
+    PUMP = "pump"
+    SENSOR = "sensor"
+    OTHER = "other"  # Added to handle unknown categories
+@dataclass
+class Symbol:
+    center: Coordinates
+    symbol_type: SymbolType = field(default=SymbolType.OTHER)
+    id: str = field(default_factory=lambda: str(uuid.uuid4()))
+    class_id: int = -1
+    original_label: str = ""
+    category: str = ""  # e.g., "inst"
+    type: str = ""  # e.g., "ind"
+    label: str = ""  # e.g., "Solenoid_actuator"
+    bbox: BBox = None
+    confidence: float = 0.95
+    model_source: str = ""  # e.g., "model2"
+    connections: List[Point] = field(default_factory=list)
+    subtype: Optional[ValveSubtype] = None
+    model_metadata: dict = field(default_factory=dict)
+    def __post_init__(self):
+        """
+        Handle any additional post-processing after initialization.
+        """
+        # Ensure bbox is a BBox object
+        if isinstance(self.bbox, list) and len(self.bbox) == 4:
+            self.bbox = BBox(*self.bbox)
+# ======================== Line ======================== #
+@dataclass
+class LineStyle:
+    connection_type: ConnectionType
+    stroke_width: int = 2
+    color: str = "#000000"  # CSS-style colors
+@dataclass
+class Line:
+    start: Point
+    end: Point
+    bbox: BBox
+    id: str = field(default_factory=lambda: str(uuid.uuid4()))
+    style: LineStyle = field(default_factory=lambda: LineStyle(ConnectionType.SOLID))
+    confidence: float = 0.90
+    topological_links: List[str] = field(default_factory=list)  # Linked symbols/junctions
+# ======================== Junction ======================== #
+class JunctionType(str, Enum):
+    T = "T"
+    L = "L"
+    END = "END"
+@dataclass
+class JunctionProperties:
+    flow_direction: Optional[str] = None  # "in", "out"
+    pressure: Optional[float] = None  # kPa
+@dataclass
+class Junction:
+    center: Coordinates
+    junction_type: JunctionType
+    id: str = field(default_factory=lambda: str(uuid.uuid4()))
+    properties: JunctionProperties = field(default_factory=JunctionProperties)
+    connected_lines: List[str] = field(default_factory=list)  # Line IDs
+# # ======================== Tag ======================== #
+# @dataclass
+# class Tag:
+#     text: str
+#     bbox: BBox
+#     associated_element: str  # ID of linked symbol/line
+#     id: str = field(default_factory=lambda: str(uuid.uuid4()))
+#     font_size: int = 12
+#     rotation: float = 0.0  # Degrees
+@dataclass
+class Tag:
+    text: str
+    bbox: BBox
+    confidence: float = 1.0
+    source: str = ""  # e.g., "easyocr"
+    text_type: str = "Unknown"  # e.g., "Unknown", could be something else later
+    id: str = field(default_factory=lambda: str(uuid.uuid4()))
+    associated_element: Optional[str] = None  # ID of linked symbol/line (can be None)
+    font_size: int = 12
+    rotation: float = 0.0  # Degrees
+    def __post_init__(self):
+        """
+        Ensure bbox is properly converted.
+        """
+        if isinstance(self.bbox, list) and len(self.bbox) == 4:
+            self.bbox = BBox(*self.bbox)
+# ----------------------------
+# DETECTION CONTEXT
+# ----------------------------
+@dataclass
+class DetectionContext:
+    """
+    In-memory container for all detected elements (lines, points, symbols, junctions, tags).
+    Each element is stored in a dict keyed by 'id' for quick lookup and update.
+    """
+    lines: Dict[str, Line] = field(default_factory=dict)
+    points: Dict[str, Point] = field(default_factory=dict)
+    symbols: Dict[str, Symbol] = field(default_factory=dict)
+    junctions: Dict[str, Junction] = field(default_factory=dict)
+    tags: Dict[str, Tag] = field(default_factory=dict)
+    # -------------------------
+    # 1) ADD / GET / REMOVE
+    # -------------------------
+    def add_line(self, line: Line) -> None:
+        self.lines[line.id] = line
+    def get_line(self, line_id: str) -> Optional[Line]:
+        return self.lines.get(line_id)
+    def remove_line(self, line_id: str) -> None:
+        self.lines.pop(line_id, None)
+    def add_point(self, point: Point) -> None:
+        self.points[point.id] = point
+    def get_point(self, point_id: str) -> Optional[Point]:
+        return self.points.get(point_id)
+    def remove_point(self, point_id: str) -> None:
+        self.points.pop(point_id, None)
+    def add_symbol(self, symbol: Symbol) -> None:
+        self.symbols[symbol.id] = symbol
+    def get_symbol(self, symbol_id: str) -> Optional[Symbol]:
+        return self.symbols.get(symbol_id)
+    def remove_symbol(self, symbol_id: str) -> None:
+        self.symbols.pop(symbol_id, None)
+    def add_junction(self, junction: Junction) -> None:
+        self.junctions[junction.id] = junction
+    def get_junction(self, junction_id: str) -> Optional[Junction]:
+        return self.junctions.get(junction_id)
+    def remove_junction(self, junction_id: str) -> None:
+        self.junctions.pop(junction_id, None)
+    def add_tag(self, tag: Tag) -> None:
+        self.tags[tag.id] = tag
+    def get_tag(self, tag_id: str) -> Optional[Tag]:
+        return self.tags.get(tag_id)
+    def remove_tag(self, tag_id: str) -> None:
+        self.tags.pop(tag_id, None)
+    # -------------------------
+    # 2) SERIALIZATION: to_dict / from_dict
+    # -------------------------
+    def to_dict(self) -> dict:
+        """Convert all stored objects into a JSON-serializable dictionary."""
+        return {
+            "lines": [self._line_to_dict(line) for line in self.lines.values()],
+            "points": [self._point_to_dict(pt) for pt in self.points.values()],
+            "symbols": [self._symbol_to_dict(sym) for sym in self.symbols.values()],
+            "junctions": [self._junction_to_dict(jn) for jn in self.junctions.values()],
+            "tags": [self._tag_to_dict(tg) for tg in self.tags.values()]
+        }
+    @classmethod
+    def from_dict(cls, data: dict) -> "DetectionContext":
+        """
+        Create a new DetectionContext from a dictionary structure (e.g. loaded from JSON).
+        """
+        context = cls()
+        # Points
+        for pt_dict in data.get("points", []):
+            pt_obj = cls._point_from_dict(pt_dict)
+            context.add_point(pt_obj)
+        # Lines
+        for ln_dict in data.get("lines", []):
+            ln_obj = cls._line_from_dict(ln_dict)
+            context.add_line(ln_obj)
+        # Symbols
+        for sym_dict in data.get("symbols", []):
+            sym_obj = cls._symbol_from_dict(sym_dict)
+            context.add_symbol(sym_obj)
+        # Junctions
+        for jn_dict in data.get("junctions", []):
+            jn_obj = cls._junction_from_dict(jn_dict)
+            context.add_junction(jn_obj)
+        # Tags
+        for tg_dict in data.get("tags", []):
+            tg_obj = cls._tag_from_dict(tg_dict)
+            context.add_tag(tg_obj)
+        return context
+    # -------------------------
+    # 3) HELPER METHODS FOR (DE)SERIALIZATION
+    # -------------------------
+    @staticmethod
+    def _bbox_to_dict(bbox: BBox) -> dict:
+        return {
+            "xmin": bbox.xmin,
+            "ymin": bbox.ymin,
+            "xmax": bbox.xmax,
+            "ymax": bbox.ymax
+        }
+    @staticmethod
+    def _bbox_from_dict(d: dict) -> BBox:
+        return BBox(
+            xmin=d["xmin"],
+            ymin=d["ymin"],
+            xmax=d["xmax"],
+            ymax=d["ymax"]
+        )
+    @staticmethod
+    def _coords_to_dict(coords: Coordinates) -> dict:
+        return {
+            "x": coords.x,
+            "y": coords.y
+        }
+    @staticmethod
+    def _coords_from_dict(d: dict) -> Coordinates:
+        return Coordinates(x=d["x"], y=d["y"])
+    @staticmethod
+    def _line_style_to_dict(style: LineStyle) -> dict:
+        return {
+            "connection_type": style.connection_type.value,
+            "stroke_width": style.stroke_width,
+            "color": style.color
+        }
+    @staticmethod
+    def _line_style_from_dict(d: dict) -> LineStyle:
+        return LineStyle(
+            connection_type=ConnectionType(d["connection_type"]),
+            stroke_width=d.get("stroke_width", 2),
+            color=d.get("color", "#000000")
+        )
+    @staticmethod
+    def _point_to_dict(pt: Point) -> dict:
+        return {
+            "id": pt.id,
+            "coords": DetectionContext._coords_to_dict(pt.coords),
+            "bbox": DetectionContext._bbox_to_dict(pt.bbox),
+            "type": pt.type.value,
+            "confidence": pt.confidence
+        }
+    @staticmethod
+    def _point_from_dict(d: dict) -> Point:
+        return Point(
+            id=d["id"],
+            coords=DetectionContext._coords_from_dict(d["coords"]),
+            bbox=DetectionContext._bbox_from_dict(d["bbox"]),
+            type=JunctionType(d["type"]),
+            confidence=d.get("confidence", 1.0)
+        )
+    @staticmethod
+    def _line_to_dict(ln: Line) -> dict:
+        return {
+            "id": ln.id,
+            "start": DetectionContext._point_to_dict(ln.start),
+            "end": DetectionContext._point_to_dict(ln.end),
+            "bbox": DetectionContext._bbox_to_dict(ln.bbox),
+            "style": DetectionContext._line_style_to_dict(ln.style),
+            "confidence": ln.confidence,
+            "topological_links": ln.topological_links
+        }
+    @staticmethod
+    def _line_from_dict(d: dict) -> Line:
+        return Line(
+            id=d["id"],
+            start=DetectionContext._point_from_dict(d["start"]),
+            end=DetectionContext._point_from_dict(d["end"]),
+            bbox=DetectionContext._bbox_from_dict(d["bbox"]),
+            style=DetectionContext._line_style_from_dict(d["style"]),
+            confidence=d.get("confidence", 0.90),
+            topological_links=d.get("topological_links", [])
+        )
+    @staticmethod
+    def _symbol_to_dict(sym: Symbol) -> dict:
+        return {
+            "id": sym.id,
+            "symbol_type": sym.symbol_type.value,
+            "bbox": DetectionContext._bbox_to_dict(sym.bbox),
+            "center": DetectionContext._coords_to_dict(sym.center),
+            "connections": [DetectionContext._point_to_dict(p) for p in sym.connections],
+            "subtype": sym.subtype.value if sym.subtype else None,
+            "confidence": sym.confidence,
+            "model_metadata": sym.model_metadata
+        }
+    @staticmethod
+    def _symbol_from_dict(d: dict) -> Symbol:
+        return Symbol(
+            id=d["id"],
+            symbol_type=SymbolType(d["symbol_type"]),
+            bbox=DetectionContext._bbox_from_dict(d["bbox"]),
+            center=DetectionContext._coords_from_dict(d["center"]),
+            connections=[DetectionContext._point_from_dict(p) for p in d.get("connections", [])],
+            subtype=ValveSubtype(d["subtype"]) if d.get("subtype") else None,
+            confidence=d.get("confidence", 0.95),
+            model_metadata=d.get("model_metadata", {})
+        )
+    @staticmethod
+    def _junction_props_to_dict(props: JunctionProperties) -> dict:
+        return {
+            "flow_direction": props.flow_direction,
+            "pressure": props.pressure
+        }
+    @staticmethod
+    def _junction_props_from_dict(d: dict) -> JunctionProperties:
+        return JunctionProperties(
+            flow_direction=d.get("flow_direction"),
+            pressure=d.get("pressure")
+        )
+    @staticmethod
+    def _junction_to_dict(jn: Junction) -> dict:
+        return {
+            "id": jn.id,
+            "center": DetectionContext._coords_to_dict(jn.center),
+            "junction_type": jn.junction_type.value,
+            "properties": DetectionContext._junction_props_to_dict(jn.properties),
+            "connected_lines": jn.connected_lines
+        }
+    @staticmethod
+    def _junction_from_dict(d: dict) -> Junction:
+        return Junction(
+            id=d["id"],
+            center=DetectionContext._coords_from_dict(d["center"]),
+            junction_type=JunctionType(d["junction_type"]),
+            properties=DetectionContext._junction_props_from_dict(d["properties"]),
+            connected_lines=d.get("connected_lines", [])
+        )
+    @staticmethod
+    def _tag_to_dict(tg: Tag) -> dict:
+        return {
+            "id": tg.id,
+            "text": tg.text,
+            "bbox": DetectionContext._bbox_to_dict(tg.bbox),
+            "associated_element": tg.associated_element,
+            "font_size": tg.font_size,
+            "rotation": tg.rotation
+        }
+    @staticmethod
+    def _tag_from_dict(d: dict) -> Tag:
+        return Tag(
+            id=d["id"],
+            text=d["text"],
+            bbox=DetectionContext._bbox_from_dict(d["bbox"]),
+            associated_element=d["associated_element"],
+            font_size=d.get("font_size", 12),
+            rotation=d.get("rotation", 0.0)
+        )
+    # -------------------------
+    # 4) OPTIONAL UTILS
+    # -------------------------
+    def to_json(self, indent: int = 2) -> str:
+        """Convert context to JSON, ensuring dataclasses and numpy types are handled correctly."""
+        return json.dumps(self.to_dict(), default=self._json_serializer, indent=indent)
+    @staticmethod
+    def _json_serializer(obj):
+        """Handles numpy types and unknown objects for JSON serialization."""
+        if isinstance(obj, np.integer):
+            return int(obj)
+        if isinstance(obj, np.floating):
+            return float(obj)
+        if isinstance(obj, np.ndarray):
+            return obj.tolist()  # Convert arrays to lists
+        if isinstance(obj, Enum):
+            return obj.value  # Convert Enums to string values
+        if hasattr(obj, "__dict__"):
+            return obj.__dict__  # Convert dataclass objects to dict
+        raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
+    @classmethod
+    def from_json(cls, json_str: str) -> "DetectionContext":
+        """Load DetectionContext from a JSON string."""
+        data = json.loads(json_str)
+        return cls.from_dict(data)

detection_utils.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import numpy as np
+from typing import List, Tuple
+import math
+def robust_merge_lines(lines: List[Tuple[float, float, float, float]],
+                      angle_thresh: float = 5.0,
+                      dist_thresh: float = 5.0) -> List[Tuple[float, float, float, float]]:
+    """
+    Merge similar line segments using angle and distance thresholds.
+    Args:
+        lines: List of line segments [(x1,y1,x2,y2),...]
+        angle_thresh: Maximum angle difference in degrees
+        dist_thresh: Maximum endpoint distance
+    Returns:
+        List of merged line segments
+    """
+    if not lines:
+        return []
+    # Convert to numpy array for easier manipulation
+    lines = np.array(lines)
+    # Calculate line angles
+    angles = np.arctan2(lines[:,3] - lines[:,1],
+                       lines[:,2] - lines[:,0])
+    angles = np.degrees(angles) % 180
+    # Group similar lines
+    merged = []
+    used = set()
+    for i, line1 in enumerate(lines):
+        if i in used:
+            continue
+        # Find similar lines
+        similar = []
+        for j, line2 in enumerate(lines):
+            if j in used:
+                continue
+            # Check angle difference
+            angle_diff = abs(angles[i] - angles[j])
+            angle_diff = min(angle_diff, 180 - angle_diff)
+            if angle_diff > angle_thresh:
+                continue
+            # Check endpoint distances
+            dist1 = np.linalg.norm(line1[:2] - line2[:2])
+            dist2 = np.linalg.norm(line1[2:] - line2[2:])
+            if min(dist1, dist2) > dist_thresh:
+                continue
+            similar.append(j)
+            used.add(j)
+        # Merge similar lines
+        if similar:
+            points = lines[similar].reshape(-1, 2)
+            direction = np.array([np.cos(np.radians(angles[i])),
+                                np.sin(np.radians(angles[i]))])
+            # Project points onto line direction
+            proj = points @ direction
+            # Get extreme points
+            min_idx = np.argmin(proj)
+            max_idx = np.argmax(proj)
+            merged_line = np.concatenate([points[min_idx], points[max_idx]])
+            merged.append(tuple(merged_line))
+    return merged
+def compute_line_angle(x1: float, y1: float, x2: float, y2: float) -> float:
+    """Compute angle of line segment in degrees"""
+    return math.degrees(math.atan2(y2 - y1, x2 - x1)) % 180

detectors.py ADDED Viewed

	@@ -0,0 +1,733 @@

+import os
+import math
+import torch
+import cv2
+import numpy as np
+from typing import List, Optional, Tuple, Dict
+from dataclasses import replace
+from math import sqrt
+import json
+import uuid
+from pathlib import Path
+# Base classes and utilities
+from base import BaseDetector
+from detection_schema import DetectionContext
+from utils import DebugHandler
+from config import SymbolConfig, TagConfig, LineConfig, PointConfig, JunctionConfig
+# DeepLSD model for line detection
+from deeplsd.models.deeplsd_inference import DeepLSD
+from ultralytics import YOLO
+# Detection schema: dataclasses for different objects
+from detection_schema import (
+    BBox,
+    Coordinates,
+    Point,
+    Line,
+    Symbol,
+    Tag,
+    SymbolType,
+    LineStyle,
+    ConnectionType,
+    JunctionType,
+    Junction
+)
+# Skeletonization and label processing for junction detection
+from skimage.morphology import skeletonize
+from skimage.measure import label
+import os
+import cv2
+import torch
+import numpy as np
+from dataclasses import replace
+from typing import List, Optional
+from detection_utils import robust_merge_lines
+class LineDetector(BaseDetector):
+    """
+    DeepLSD-based line detection with patch-based tiling and global merging.
+    """
+    def __init__(self,
+                 config: LineConfig,
+                 model_path: str,
+                 model_config: dict,
+                 device: torch.device,
+                 debug_handler: DebugHandler = None):
+        super().__init__(config, debug_handler)
+        # Fix device selection for Apple Silicon
+        if torch.backends.mps.is_available():
+            self.device = torch.device("mps")
+        elif torch.cuda.is_available():
+            self.device = torch.device("cuda")
+        else:
+            self.device = torch.device("cpu")
+        self.model_path = model_path
+        self.model_config = model_config
+        self.model = self._load_model(model_path)
+        # Patch parameters
+        self.patch_size = 512
+        self.overlap = 10
+        # Merging thresholds
+        self.angle_thresh = 5.0  # degrees
+        self.dist_thresh = 5.0  # pixels
+    def _preprocess(self, image: np.ndarray) -> np.ndarray:
+        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
+        dilated = cv2.dilate(image, kernel, iterations=2)
+        skeleton = cv2.bitwise_not(dilated)
+        skeleton = skeletonize(skeleton // 255)
+        skeleton = (skeleton * 255).astype(np.uint8)
+        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
+        clean_image = cv2.dilate(skeleton, kernel, iterations=5)
+        self.debug_handler.save_artifact(name="skeleton", data=clean_image, extension="png")
+        return clean_image
+    def _postprocess(self, image: np.ndarray) -> np.ndarray:
+        return None
+    # -------------------------------------
+    # 1) Load Model
+    # -------------------------------------
+    def _load_model(self, model_path: str) -> DeepLSD:
+        if not os.path.exists(model_path):
+            raise FileNotFoundError(f"Model file not found: {model_path}")
+        ckpt = torch.load(model_path, map_location=self.device)
+        model = DeepLSD(self.model_config)
+        model.load_state_dict(ckpt["model"])
+        return model.to(self.device).eval()
+    # -------------------------------------
+    # 2) Main Detection Pipeline
+    # -------------------------------------
+    def detect(self,
+               image: np.ndarray,
+               context: DetectionContext,
+               mask_coords: Optional[List[BBox]] = None,
+               *args,
+               **kwargs) -> None:
+        """
+        Steps:
+          - Optional mask + threshold
+          - Tile into overlapping patches
+          - For each patch => run DeepLSD => re-map lines to global coords
+          - Merge lines robustly
+          - Build final Line objects => add to context
+        """
+        mask_coords = mask_coords or []
+        skeleton = self._preprocess(image)
+        # (A) Optional mask + threshold if you want a binary
+        #     If your model expects grayscale or binary, do it here:
+        processed_img = self._apply_mask_and_threshold(skeleton, mask_coords)
+        # (B) Patch-based inference => collect raw lines in global coords
+        all_lines = self._detect_in_patches(processed_img)
+        # (C) Merge the lines in the global coordinate system
+        merged_line_segments = robust_merge_lines(
+            all_lines,
+            angle_thresh=self.angle_thresh,
+            dist_thresh=self.dist_thresh
+        )
+        # (D) Convert merged segments => final Line objects, add to context
+        for (x1, y1, x2, y2) in merged_line_segments:
+            line_obj = self._create_line_object(x1, y1, x2, y2)
+            context.add_line(line_obj)
+    # -------------------------------------
+    # 3) Optional Mask + Threshold
+    # -------------------------------------
+    def _apply_mask_and_threshold(self, image: np.ndarray, mask_coords: List[BBox]) -> np.ndarray:
+        """White out rectangular areas, then threshold to binary (if needed)."""
+        masked = image.copy()
+        for bbox in mask_coords:
+            x1, y1 = int(bbox.xmin), int(bbox.ymin)
+            x2, y2 = int(bbox.xmax), int(bbox.ymax)
+            cv2.rectangle(masked, (x1, y1), (x2, y2), (255, 255, 255), -1)
+        # If image has 3 channels, convert to grayscale
+        if len(masked.shape) == 3:
+            masked_gray = cv2.cvtColor(masked, cv2.COLOR_BGR2GRAY)
+        else:
+            masked_gray = masked
+        # Binary threshold (adjust threshold as needed)
+        # If your model expects a plain grayscale, skip threshold
+        binary_img = cv2.threshold(masked_gray, 127, 255, cv2.THRESH_BINARY)[1]
+        return binary_img
+    # -------------------------------------
+    # 4) Patch-Based Inference
+    # -------------------------------------
+    def _detect_in_patches(self, processed_img: np.ndarray) -> List[tuple]:
+        """
+        Break the image into overlapping patches, run DeepLSD,
+        map local lines => global coords, and return the global line list.
+        """
+        patch_size = self.patch_size
+        overlap = self.overlap
+        height, width = processed_img.shape[:2]
+        step = patch_size - overlap
+        all_lines = []
+        for y in range(0, height, step):
+            patch_ymax = min(y + patch_size, height)
+            patch_ymin = patch_ymax - patch_size if (patch_ymax - y) < patch_size else y
+            if patch_ymin < 0: patch_ymin = 0
+            for x in range(0, width, step):
+                patch_xmax = min(x + patch_size, width)
+                patch_xmin = patch_xmax - patch_size if (patch_xmax - x) < patch_size else x
+                if patch_xmin < 0: patch_xmin = 0
+                patch = processed_img[patch_ymin:patch_ymax, patch_xmin:patch_xmax]
+                # Run model
+                local_lines = self._run_model_inference(patch)
+                # Convert local lines => global coords
+                for ln in local_lines:
+                    (x1_local, y1_local), (x2_local, y2_local) = ln
+                    # offset by patch_xmin, patch_ymin
+                    gx1 = x1_local + patch_xmin
+                    gy1 = y1_local + patch_ymin
+                    gx2 = x2_local + patch_xmin
+                    gy2 = y2_local + patch_ymin
+                    # Optional: clamp or filter lines partially out-of-bounds
+                    if 0 <= gx1 < width and 0 <= gx2 < width and 0 <= gy1 < height and 0 <= gy2 < height:
+                        all_lines.append((gx1, gy1, gx2, gy2))
+        return all_lines
+    # -------------------------------------
+    # 5) Model Inference (Single Patch)
+    # -------------------------------------
+    def _run_model_inference(self, patch_img: np.ndarray) -> np.ndarray:
+        """
+        Run DeepLSD on a single patch (already masked/thresholded).
+        patch_img shape: [patchH, patchW].
+        Returns lines shape: [N, 2, 2].
+        """
+        # Convert patch to float32 and scale
+        inp = torch.tensor(patch_img, dtype=torch.float32, device=self.device)[None, None] / 255.0
+        with torch.no_grad():
+            output = self.model({"image": inp})
+            lines = output["lines"][0]  # shape (N, 2, 2)
+        return lines
+    # -------------------------------------
+    # 6) Convert Merged Segments => Line Objects
+    # -------------------------------------
+    def _create_line_object(self, x1: float, y1: float, x2: float, y2: float) -> Line:
+        """
+        Create a minimal `Line` object from the final merged coordinates.
+        """
+        margin = 2
+        # Start point
+        start_pt = Point(
+            coords=Coordinates(int(x1), int(y1)),
+            bbox=BBox(
+                xmin=int(x1 - margin),
+                ymin=int(y1 - margin),
+                xmax=int(x1 + margin),
+                ymax=int(y1 + margin)
+            ),
+            type=JunctionType.END,
+            confidence=1.0
+        )
+        # End point
+        end_pt = Point(
+            coords=Coordinates(int(x2), int(y2)),
+            bbox=BBox(
+                xmin=int(x2 - margin),
+                ymin=int(y2 - margin),
+                xmax=int(x2 + margin),
+                ymax=int(y2 + margin)
+            ),
+            type=JunctionType.END,
+            confidence=1.0
+        )
+        # Overall bounding box
+        x_min = int(min(x1, x2))
+        x_max = int(max(x1, x2))
+        y_min = int(min(y1, y2))
+        y_max = int(max(y1, y2))
+        line_obj = Line(
+            start=start_pt,
+            end=end_pt,
+            bbox=BBox(xmin=x_min, ymin=y_min, xmax=x_max, ymax=y_max),
+            style=LineStyle(
+                connection_type=ConnectionType.SOLID,
+                stroke_width=2,
+                color="#000000"
+            ),
+            confidence=0.9,
+            topological_links=[]
+        )
+        return line_obj
+class PointDetector(BaseDetector):
+    """
+    A detector that:
+      1) Reads lines from the context
+      2) Clusters endpoints within 'threshold_distance'
+      3) Updates lines so that shared endpoints reference the same Point object
+    """
+    def __init__(self,
+                 config:PointConfig,
+                 debug_handler: DebugHandler = None):
+        super().__init__(config, debug_handler)  # No real model to load
+        self.threshold_distance = config.threshold_distance
+    def _load_model(self, model_path: str):
+        """No model needed for simple point unification."""
+        return None
+    def detect(self, image: np.ndarray, context: DetectionContext, *args, **kwargs) -> None:
+        """
+        Main method called by the pipeline.
+        1) Gather all line endpoints from context
+        2) Cluster them within 'threshold_distance'
+        3) Update the line endpoints so they reference the unified cluster point
+        """
+        # 1) Collect all endpoints
+        endpoints = []
+        for line in context.lines.values():
+            endpoints.append(line.start)
+            endpoints.append(line.end)
+        # 2) Cluster endpoints
+        clusters = self._cluster_points(endpoints, self.threshold_distance)
+        # 3) Build a dictionary of "representative" points
+        #    So that each cluster has one "canonical" point
+        #    Then we link all the points in that cluster to the canonical reference
+        unified_point_map = {}
+        for cluster in clusters:
+            # let's pick the first point in the cluster as the "representative"
+            rep_point = cluster[0]
+            for p in cluster[1:]:
+                unified_point_map[p.id] = rep_point
+        # 4) Update all lines to reference the canonical point
+        for line in context.lines.values():
+            # unify start
+            if line.start.id in unified_point_map:
+                line.start = unified_point_map[line.start.id]
+            # unify end
+            if line.end.id in unified_point_map:
+                line.end = unified_point_map[line.end.id]
+        # We could also store the final set of unique points back in context.points
+        # (e.g. clearing old duplicates).
+        # That step is optional: you might prefer to keep everything in lines only,
+        # or you might want context.points as a separate reference.
+        # If you want to keep unique points in context.points:
+        new_points = {}
+        for line in context.lines.values():
+            new_points[line.start.id] = line.start
+            new_points[line.end.id] = line.end
+        context.points = new_points  # replace the dictionary of points
+    def _preprocess(self, image: np.ndarray) -> np.ndarray:
+        """No specific image preprocessing needed."""
+        return image
+    def _postprocess(self, image: np.ndarray) -> np.ndarray:
+        """No specific image postprocessing needed."""
+        return image
+    # ----------------------
+    # HELPER: clustering
+    # ----------------------
+    def _cluster_points(self, points: List[Point], threshold: float) -> List[List[Point]]:
+        """
+        Very naive clustering:
+         1) Start from the first point
+         2) If it's within threshold of an existing cluster's representative,
+            put it in that cluster
+         3) Otherwise start a new cluster
+        Return: list of clusters, each is a list of Points
+        """
+        clusters = []
+        for pt in points:
+            placed = False
+            for cluster in clusters:
+                # pick the first point in the cluster as reference
+                ref_pt = cluster[0]
+                if self._distance(pt, ref_pt) < threshold:
+                    cluster.append(pt)
+                    placed = True
+                    break
+            if not placed:
+                clusters.append([pt])
+        return clusters
+    def _distance(self, p1: Point, p2: Point) -> float:
+        dx = p1.coords.x - p2.coords.x
+        dy = p1.coords.y - p2.coords.y
+        return sqrt(dx*dx + dy*dy)
+class JunctionDetector(BaseDetector):
+    """
+    Classifies points as 'END', 'L', or 'T' by skeletonizing the binarized image
+    and analyzing local connectivity. Also creates Junction objects in the context.
+    """
+    def __init__(self, config: JunctionConfig, debug_handler: DebugHandler = None):
+        super().__init__(config, debug_handler)  # no real model path
+        self.window_size = config.window_size
+        self.radius = config.radius
+        self.angle_threshold_lb = config.angle_threshold_lb
+        self.angle_threshold_ub = config.angle_threshold_ub
+        self.debug_handler = debug_handler or DebugHandler()
+    def _load_model(self, model_path: str):
+        """Not loading any actual model, just skeleton logic."""
+        return None
+    def detect(self,
+               image: np.ndarray,
+               context: DetectionContext,
+               *args,
+               **kwargs) -> None:
+        """
+        1) Convert to binary & skeletonize
+        2) Classify each point in the context
+        3) Create a Junction for each point and store it in context.junctions
+           (with 'connected_lines' referencing lines that share this point).
+        """
+        # 1) Preprocess -> skeleton
+        skeleton = self._create_skeleton(image)
+        # 2) Classify each point
+        for pt in context.points.values():
+            pt.type = self._classify_point(skeleton, pt)
+        # 3) Create a Junction object for each point
+        #    If you prefer only T or L, you can filter out END points.
+        self._record_junctions_in_context(context)
+    def _preprocess(self, image: np.ndarray) -> np.ndarray:
+        """We might do thresholding; let's do a simple binary threshold."""
+        if image.ndim == 3:
+            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        else:
+            gray = image
+        _, bin_image = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
+        return bin_image
+    def _postprocess(self, image: np.ndarray) -> np.ndarray:
+        return image
+    def _create_skeleton(self, raw_image: np.ndarray) -> np.ndarray:
+        """Skeletonize the binarized image."""
+        bin_img = self._preprocess(raw_image)
+        # For skeletonize, we need a boolean array
+        inv = cv2.bitwise_not(bin_img)
+        inv_bool = (inv > 127).astype(np.uint8)
+        skel = skeletonize(inv_bool).astype(np.uint8) * 255
+        return skel
+    def _classify_point(self, skeleton: np.ndarray, pt: Point) -> JunctionType:
+        """
+        Given a skeleton image, look around 'pt' in a local window
+        to determine if it's an END, L, or T.
+        """
+        classification = JunctionType.END  # default
+        half_w = self.window_size // 2
+        x, y = pt.coords.x, pt.coords.y
+        top    = max(0, y - half_w)
+        bottom = min(skeleton.shape[0], y + half_w + 1)
+        left   = max(0, x - half_w)
+        right  = min(skeleton.shape[1], x + half_w + 1)
+        patch = (skeleton[top:bottom, left:right] > 127).astype(np.uint8)
+        # create circular mask
+        circle_mask = np.zeros_like(patch, dtype=np.uint8)
+        local_cx = x - left
+        local_cy = y - top
+        cv2.circle(circle_mask, (local_cx, local_cy), self.radius, 1, -1)
+        circle_skel = patch & circle_mask
+        # label connected regions
+        labeled = label(circle_skel, connectivity=2)
+        num_exits = labeled.max()
+        if num_exits == 1:
+            classification = JunctionType.END
+        elif num_exits == 2:
+            # check angle for L
+            classification = self._check_angle_for_L(labeled)
+        elif num_exits == 3:
+            classification = JunctionType.T
+        return classification
+    def _check_angle_for_L(self, labeled_region: np.ndarray) -> JunctionType:
+        """
+        If the angle between two branches is within
+        [angle_threshold_lb, angle_threshold_ub], it's 'L'.
+        Otherwise default to END.
+        """
+        coords = np.argwhere(labeled_region == 1)
+        if len(coords) < 2:
+            return JunctionType.END
+        (y1, x1), (y2, x2) = coords[:2]
+        dx = x2 - x1
+        dy = y2 - y1
+        angle = math.degrees(math.atan2(dy, dx))
+        acute_angle = min(abs(angle), 180 - abs(angle))
+        if self.angle_threshold_lb <= acute_angle <= self.angle_threshold_ub:
+            return JunctionType.L
+        return JunctionType.END
+    # -----------------------------------------
+    #  EXTRA STEP: Create Junction objects
+    # -----------------------------------------
+    def _record_junctions_in_context(self, context: DetectionContext):
+        """
+        Create a Junction object for each point in context.points.
+        If you only want T/L points as junctions, filter them out.
+        Also track any lines that connect to this point.
+        """
+        for pt in context.points.values():
+            # If you prefer to store all points as junction, do it:
+            # or if you want only T or L, do:
+            # if pt.type in {JunctionType.T, JunctionType.L}: ...
+            jn = Junction(
+                center=pt.coords,
+                junction_type=pt.type,
+                # add more properties if needed
+            )
+            # find lines that connect to this point
+            connected_lines = []
+            for ln in context.lines.values():
+                if ln.start.id == pt.id or ln.end.id == pt.id:
+                    connected_lines.append(ln.id)
+            jn.connected_lines = connected_lines
+            # add to context
+            context.add_junction(jn)
+import json
+import uuid
+class SymbolDetector(BaseDetector):
+    """
+    A placeholder detector that reads precomputed symbol data
+    from a JSON file and populates the context with Symbol objects.
+    """
+    def __init__(self,
+                 config: SymbolConfig,
+                 debug_handler: Optional[DebugHandler] = None,
+                 symbol_json_path: str = "./symbols.json"):
+        super().__init__(config=config, debug_handler=debug_handler)
+        self.symbol_json_path = symbol_json_path
+    def _load_model(self, model_path: str):
+        """Not loading an actual model; symbol data is read from JSON."""
+        return None
+    def detect(self,
+               image: np.ndarray,
+               context: DetectionContext,
+               # roi_offset: Tuple[int, int],
+               *args,
+               **kwargs) -> None:
+        """
+        Reads from a JSON file containing symbol info,
+        adjusts coordinates using roi_offset, and updates context.
+        """
+        symbol_data = self._load_json_data(self.symbol_json_path)
+        if not symbol_data:
+            return
+        # x_min, y_min = roi_offset  # Offset values from cropping
+        for record in symbol_data.get("detections", []):  # Fix: Use "detections" key
+            # sym_obj = self._parse_symbol_record(record, x_min, y_min)
+            sym_obj = self._parse_symbol_record(record)
+            context.add_symbol(sym_obj)
+    def _preprocess(self, image: np.ndarray) -> np.ndarray:
+        return image
+    def _postprocess(self, image: np.ndarray) -> np.ndarray:
+        return image
+    # --------------
+    # HELPER METHODS
+    # --------------
+    def _load_json_data(self, json_path: str) -> dict:
+        if not os.path.exists(json_path):
+            self.debug_handler.save_artifact(name="symbol_error",
+                                             data=b"Missing symbol JSON file",
+                                             extension="txt")
+            return {}
+        with open(json_path, "r", encoding="utf-8") as f:
+            return json.load(f)
+    def _parse_symbol_record(self, record: dict) -> Symbol:
+        """
+        Builds a Symbol object from a JSON record, adjusting coordinates for cropping.
+        """
+        bbox_list = record.get("bbox", [0, 0, 0, 0])
+        # bbox_obj = BBox(
+        #     xmin=bbox_list[0] - x_min,
+        #     ymin=bbox_list[1] - y_min,
+        #     xmax=bbox_list[2] - x_min,
+        #     ymax=bbox_list[3] - y_min
+        # )
+        bbox_obj = BBox(
+            xmin=bbox_list[0],
+            ymin=bbox_list[1],
+            xmax=bbox_list[2],
+            ymax=bbox_list[3]
+        )
+        # Compute the center
+        center_coords = Coordinates(
+            x=(bbox_obj.xmin + bbox_obj.xmax) // 2,
+            y=(bbox_obj.ymin + bbox_obj.ymax) // 2
+        )
+        return Symbol(
+            id=record.get("symbol_id", ""),
+            class_id=record.get("class_id", -1),
+            original_label=record.get("original_label", ""),
+            category=record.get("category", ""),
+            type=record.get("type", ""),
+            label=record.get("label", ""),
+            bbox=bbox_obj,
+            center=center_coords,
+            confidence=record.get("confidence", 0.95),
+            model_source=record.get("model_source", ""),
+            connections=[]
+        )
+class TagDetector(BaseDetector):
+    """
+    A placeholder detector that reads precomputed tag data
+    from a JSON file and populates the context with Tag objects.
+    """
+    def __init__(self,
+                 config: TagConfig,
+                 debug_handler: Optional[DebugHandler] = None,
+                 tag_json_path: str = "./tags.json"):
+        super().__init__(config=config, debug_handler=debug_handler)
+        self.tag_json_path = tag_json_path
+    def _load_model(self, model_path: str):
+        """Not loading an actual model; tag data is read from JSON."""
+        return None
+    def detect(self,
+               image: np.ndarray,
+               context: DetectionContext,
+               # roi_offset: Tuple[int, int],
+               *args,
+               **kwargs) -> None:
+        """
+        Reads from a JSON file containing tag info,
+        adjusts coordinates using roi_offset, and updates context.
+        """
+        tag_data = self._load_json_data(self.tag_json_path)
+        if not tag_data:
+            return
+        # x_min, y_min = roi_offset  # Offset values from cropping
+        for record in tag_data.get("detections", []):  # Fix: Use "detections" key
+            # tag_obj = self._parse_tag_record(record, x_min, y_min)
+            tag_obj = self._parse_tag_record(record)
+            context.add_tag(tag_obj)
+    def _preprocess(self, image: np.ndarray) -> np.ndarray:
+        return image
+    def _postprocess(self, image: np.ndarray) -> np.ndarray:
+        return image
+    # --------------
+    # HELPER METHODS
+    # --------------
+    def _load_json_data(self, json_path: str) -> dict:
+        if not os.path.exists(json_path):
+            self.debug_handler.save_artifact(name="tag_error",
+                                             data=b"Missing tag JSON file",
+                                             extension="txt")
+            return {}
+        with open(json_path, "r", encoding="utf-8") as f:
+            return json.load(f)
+    def _parse_tag_record(self, record: dict) -> Tag:
+        """
+        Builds a Tag object from a JSON record, adjusting coordinates for cropping.
+        """
+        bbox_list = record.get("bbox", [0, 0, 0, 0])
+        # bbox_obj = BBox(
+        #     xmin=bbox_list[0] - x_min,
+        #     ymin=bbox_list[1] - y_min,
+        #     xmax=bbox_list[2] - x_min,
+        #     ymax=bbox_list[3] - y_min
+        # )
+        bbox_obj = BBox(
+            xmin=bbox_list[0],
+            ymin=bbox_list[1],
+            xmax=bbox_list[2],
+            ymax=bbox_list[3]
+        )
+        return Tag(
+            text=record.get("text", ""),
+            bbox=bbox_obj,
+            confidence=record.get("confidence", 1.0),
+            source=record.get("source", ""),
+            text_type=record.get("text_type", "Unknown"),
+            id=record.get("id", str(uuid.uuid4())),
+            font_size=record.get("font_size", 12),
+            rotation=record.get("rotation", 0.0)
+        )

download_models.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import os
+import shutil
+import torch
+from doctr.models import ocr_predictor
+from ultralytics import YOLO
+from deeplsd.models.deeplsd_inference import DeepLSD
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+def copy_local_models():
+    """Copy models from local directory to deployment"""
+    # Create model directories
+    os.makedirs('models/yolo', exist_ok=True)
+    os.makedirs('models/deeplsd', exist_ok=True)
+    os.makedirs('models/doctr', exist_ok=True)
+    # Source paths (adjust these to your local paths)
+    local_models_dir = "../models"
+    # Copy YOLO model
+    yolo_src = os.path.join(local_models_dir, "yolov8n.pt")
+    yolo_dst = "models/yolo/yolov8n.pt"
+    if os.path.exists(yolo_src):
+        shutil.copy2(yolo_src, yolo_dst)
+        print(f"Copied YOLO model to {yolo_dst}")
+    # Copy DeepLSD model
+    deeplsd_src = os.path.join(local_models_dir, "deeplsd_md.tar")
+    deeplsd_dst = "models/deeplsd/deeplsd_md.tar"
+    if os.path.exists(deeplsd_src):
+        shutil.copy2(deeplsd_src, deeplsd_dst)
+        print(f"Copied DeepLSD model to {deeplsd_dst}")
+    # Copy DocTR model if exists
+    doctr_src = os.path.join(local_models_dir, "ocr_predictor.pt")
+    doctr_dst = "models/doctr/ocr_predictor.pt"
+    if os.path.exists(doctr_src):
+        shutil.copy2(doctr_src, doctr_dst)
+        print(f"Copied DocTR model to {doctr_dst}")
+    else:
+        # Download DocTR model if not available locally
+        predictor = ocr_predictor(pretrained=True)
+        torch.save(predictor.state_dict(), doctr_dst)
+        print(f"Downloaded DocTR model to {doctr_dst}")
+if __name__ == "__main__":
+    copy_local_models()

gradioChatApp.py ADDED Viewed

	@@ -0,0 +1,806 @@

+import os
+import base64
+import gradio as gr
+import json
+from datetime import datetime
+from symbol_detection import run_detection_with_optimal_threshold
+from line_detection_ai import DiagramDetectionPipeline, LineDetector, LineConfig, ImageConfig, DebugHandler, \
+    PointConfig, JunctionConfig, PointDetector, JunctionDetector, SymbolConfig, SymbolDetector, TagConfig, TagDetector
+from data_aggregation_ai import DataAggregator
+from chatbot_agent import get_assistant_response
+from storage import StorageFactory, LocalStorage
+import traceback
+from text_detection_combined import process_drawing
+from pathlib import Path
+from pdf_processor import DocumentProcessor
+import networkx as nx
+import logging
+import matplotlib.pyplot as plt
+from dotenv import load_dotenv
+import torch
+from graph_visualization import create_graph_visualization
+import shutil
+from detection_schema import BBox  # Add this import
+import cv2
+import numpy as np
+import time
+from huggingface_hub import HfApi, login
+from download_models import download_models, copy_local_models
+# Load environment variables from .env file
+load_dotenv()
+# Configure logging at the start of the file
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S'
+)
+# Get logger for this module
+logger = logging.getLogger(__name__)
+# Disable duplicate logs from other modules
+logging.getLogger('PIL').setLevel(logging.WARNING)
+logging.getLogger('matplotlib').setLevel(logging.WARNING)
+logging.getLogger('gradio').setLevel(logging.WARNING)
+logging.getLogger('networkx').setLevel(logging.WARNING)
+logging.getLogger('line_detection_ai').setLevel(logging.WARNING)
+logging.getLogger('symbol_detection').setLevel(logging.WARNING)
+# Only log important messages
+def log_process_step(message, level=logging.INFO):
+    """Log processing steps with appropriate level"""
+    if level >= logging.WARNING:
+        logger.log(level, message)
+    elif "completed" in message.lower() or "generated" in message.lower():
+        logger.info(message)
+# Helper function to format timestamps
+def get_timestamp():
+    return datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+def format_message(role, content):
+    """Format message for chatbot history."""
+    return {"role": role, "content": content}
+# Load avatar images for agents
+localStorage = LocalStorage()
+agent_avatar = base64.b64encode(localStorage.load_file("assets/AiAgent.png")).decode()
+llm_avatar = base64.b64encode(localStorage.load_file("assets/llm.png")).decode()
+user_avatar = base64.b64encode(localStorage.load_file("assets/user.png")).decode()
+# Chat message formatting with avatars and enhanced HTML for readability
+def chat_message(role, message, avatar, timestamp):
+    # Convert Markdown-style formatting to HTML
+    formatted_message = (
+        message.replace("**", "<strong>").replace("**", "</strong>")
+               .replace("###", "<h3>").replace("##", "<h2>")
+               .replace("#", "<h1>").replace("\n", "<br>")
+               .replace("```", "<pre><code>").replace("`", "</code></pre>")
+               .replace("\n1. ", "<br>1. ")  # For ordered lists starting with "1."
+               .replace("\n2. ", "<br>2. ")
+               .replace("\n3. ", "<br>3. ")
+               .replace("\n4. ", "<br>4. ")
+               .replace("\n5. ", "<br>5. ")
+    )
+    return f"""
+    <div class="chat-message {role}">
+        <img src="data:image/png;base64,{avatar}" class="avatar"/>
+        <div>
+            <div class="speech-bubble {role}-bubble">{formatted_message}</div>
+            <div class="timestamp">{timestamp}</div>
+        </div>
+    </div>
+    """
+def resize_to_fit(image_path, max_width=800, max_height=600):
+    """Resize image to fit editor while maintaining aspect ratio"""
+    # Read image
+    img = cv2.imread(image_path)
+    if img is None:
+        return None, 1.0
+    # Get original dimensions
+    h, w = img.shape[:2]
+    # Calculate scale factor to fit within max dimensions
+    scale_w = max_width / w
+    scale_h = max_height / h
+    scale = min(scale_w, scale_h)
+    # Always resize to fit the editor window
+    new_w = int(w * scale)
+    new_h = int(h * scale)
+    resized = cv2.resize(img, (new_w, new_h))
+    return resized, scale
+# Main processing function for P&ID steps
+def process_pnid(image_file, progress=gr.Progress()):
+    """Process P&ID document with real-time progress updates."""
+    try:
+        if image_file is None:
+            raise ValueError("No file uploaded. Please upload a file first.")
+        progress_text = []
+        outputs = [None] * 9  # Changed from 8 to 9 to match UI outputs
+        base_name = os.path.splitext(os.path.basename(image_file.name))[0] + "_page_1"
+        # Initialize chat history with proper format
+        chat_history = [{"role": "assistant", "content": "Welcome! Upload a P&ID to begin analysis."}]
+        outputs[7] = chat_history  # Chat history moved to index 7
+        def update_progress(step, message):
+            progress_text.append(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} - {message}")
+            outputs[0] = "\n".join(progress_text)  # Progress text
+            progress(step)
+        # Initialize storage and results directory
+        storage = StorageFactory.get_storage()
+        results_dir = "results"
+        os.makedirs(results_dir, exist_ok=True)
+        # Clean results directory
+        logger.info("Cleaned results directory: results")
+        for file in os.listdir(results_dir):
+            file_path = os.path.join(results_dir, file)
+            try:
+                if os.path.isfile(file_path):
+                    os.unlink(file_path)
+            except Exception as e:
+                logger.error(f"Error deleting file {file_path}: {str(e)}")
+        # Step 1: File Upload (10%)
+        logger.info(f"Processing file: {os.path.basename(image_file.name)}")
+        update_progress(0.1, "Step 1/7: File uploaded successfully")
+        yield outputs
+        # Step 2: Document Processing - Get high quality PNG
+        update_progress(0.2, "Step 2/7: Processing document...")
+        doc_processor = DocumentProcessor(storage)
+        processed_pages = doc_processor.process_document(
+            file_path=image_file,
+            output_dir=results_dir
+        )
+        if not processed_pages:
+            raise ValueError("No pages processed from document")
+        # Use high quality PNG for everything
+        high_quality_png = processed_pages[0]
+        outputs[1] = high_quality_png  # P&ID Tab shows original high quality
+        update_progress(0.25, "Document loaded and displayed")
+        yield outputs
+        # Step 3: Symbol Detection using high quality PNG
+        detection_image_path, detection_json_path, _, diagram_bbox = run_detection_with_optimal_threshold(
+            high_quality_png,  # Use high quality PNG
+            results_dir=results_dir,
+            file_name=os.path.basename(high_quality_png),
+            storage=storage,
+            resize_image=False  # Don't resize
+        )
+        outputs[2] = detection_image_path  # Symbols Tab
+        symbol_json_path = detection_json_path
+        # Step 4: Text Detection using high quality PNG
+        text_results, text_summary = process_drawing(
+            high_quality_png,  # Use high quality PNG
+            results_dir,
+            storage
+        )
+        text_json_path = text_results['json_path']
+        outputs[3] = text_results['image_path']  # Tags Tab
+        # Step 5: Line Detection (80%)
+        update_progress(0.80, "Step 5/7: Line Detection")
+        yield outputs
+        try:
+            # Initialize components
+            debug_handler = DebugHandler(enabled=True, storage=storage)
+            # Configure detectors
+            line_config = LineConfig()
+            point_config = PointConfig()
+            junction_config = JunctionConfig()
+            symbol_config = SymbolConfig(
+                model_path="models/Intui_SDM_41.pt",
+                confidence_threshold=0.5,
+                nms_threshold=0.3
+            )
+            tag_config = TagConfig(
+                model_path="models/tag_detection.json",
+                confidence_threshold=0.5
+            )
+            # Create all required detectors
+            symbol_detector = SymbolDetector(
+                config=symbol_config,
+                debug_handler=debug_handler
+            )
+            tag_detector = TagDetector(
+                config=tag_config,
+                debug_handler=debug_handler
+            )
+            line_detector = LineDetector(
+                config=line_config,
+                model_path="models/deeplsd_md.tar",
+                model_config={"detect_lines": True},
+                device=torch.device("cuda"),
+                debug_handler=debug_handler
+            )
+            point_detector = PointDetector(
+                config=point_config,
+                debug_handler=debug_handler
+            )
+            junction_detector = JunctionDetector(
+                config=junction_config,
+                debug_handler=debug_handler
+            )
+            # Create pipeline with all detectors
+            pipeline = DiagramDetectionPipeline(
+                tag_detector=tag_detector,
+                symbol_detector=symbol_detector,
+                line_detector=line_detector,
+                point_detector=point_detector,
+                junction_detector=junction_detector,
+                storage=storage,
+                debug_handler=debug_handler
+            )
+            # Run pipeline with original high-res image
+            line_results = pipeline.run(
+                image_path=high_quality_png,
+                output_dir=results_dir,
+                config=ImageConfig()
+            )
+            line_json_path = line_results.json_path
+            outputs[4] = line_results.image_path
+            # Verify line detection output
+            if not os.path.exists(line_json_path):
+                raise ValueError(f"Line detection JSON not found: {line_json_path}")
+            # Verify line detection JSON content
+            with open(line_json_path, 'r') as f:
+                line_data = json.load(f)
+                if 'lines' not in line_data:
+                    raise ValueError(f"Invalid line detection data format in {line_json_path}")
+                logger.info(f"Line detection completed successfully with {len(line_data['lines'])} lines")
+            # Verify all required JSONs exist before aggregation
+            required_jsons = {
+                'symbols': symbol_json_path,
+                'texts': text_json_path,
+                'lines': line_json_path
+            }
+            for name, path in required_jsons.items():
+                if not os.path.exists(path):
+                    raise ValueError(f"{name} JSON not found: {path}")
+                # Verify JSON can be loaded
+                with open(path, 'r') as f:
+                    data = json.load(f)
+                    logger.info(f"Loaded {name} JSON with {len(data.get('detections', data.get('lines', [])))} items")
+            # Data Aggregation
+            aggregator = DataAggregator(storage=storage)
+            aggregated_result = aggregator.process_data(
+                image_path=high_quality_png,
+                output_dir=results_dir,
+                symbols_path=symbol_json_path,
+                texts_path=text_json_path,
+                lines_path=line_json_path
+            )
+            # Verify aggregation result before graph creation
+            if not aggregated_result.get('success'):
+                raise ValueError(f"Data aggregation failed: {aggregated_result.get('error')}")
+            aggregated_json_path = aggregated_result['json_path']
+            if not os.path.exists(aggregated_json_path):
+                raise ValueError(f"Aggregated JSON not found: {aggregated_json_path}")
+            # Verify aggregated JSON content
+            with open(aggregated_json_path, 'r') as f:
+                aggregated_data = json.load(f)
+                required_keys = ['nodes', 'edges', 'symbols', 'texts', 'lines']
+                missing_keys = [k for k in required_keys if k not in aggregated_data]
+                if missing_keys:
+                    raise ValueError(f"Aggregated JSON missing required keys: {missing_keys}")
+                logger.info("Aggregation completed successfully with:")
+                logger.info(f"- {len(aggregated_data['nodes'])} nodes")
+                logger.info(f"- {len(aggregated_data['edges'])} edges")
+            # After aggregation, create graph visualization
+            update_progress(0.85, "Step 6/7: Creating Knowledge Graph")
+            try:
+                # Create graph visualization
+                graph_results = create_graph_visualization(
+                    json_path=aggregated_json_path,
+                    output_dir=results_dir,
+                    base_name=base_name,
+                    save_plot=True
+                )
+                if not graph_results.get('success'):
+                    logger.error(f"Error in graph generation: {graph_results.get('error')}")
+                    raise Exception(graph_results.get('error'))
+                graph_path = f"results/{base_name}_graph_visualization.png"
+                if not os.path.exists(graph_path):
+                    raise Exception("Graph visualization file not created")
+                update_progress(0.90, "Step 6/7: Knowledge Graph Created")
+            except Exception as e:
+                logger.error(f"Error creating graph visualization: {str(e)}")
+                raise
+            # Fix output assignments
+            outputs[0] = progress_text  # Progress text
+            outputs[1] = high_quality_png  # P&ID
+            outputs[2] = detection_image_path  # Symbols
+            outputs[3] = text_results['image_path']  # Tags
+            outputs[4] = line_results.image_path  # Lines
+            outputs[5] = f"results/{base_name}_aggregated.png"  # Aggregated
+            outputs[6] = graph_path  # Graph visualization
+            outputs[7] = chat_history  # Chat
+            outputs[8] = aggregated_json_path  # JSON state
+            # Update progress with all steps
+            update_progress(0.95, "Step 7/7: Finalizing Results")
+            chat_history = [{"role": "assistant", "content": "Processing complete! I can help answer questions about the P&ID contents."}]
+            outputs[7] = chat_history
+            update_progress(1.0, "✅ Processing Complete")
+            yield outputs
+        except Exception as e:
+            # Update chat with error message
+            chat_history = [{"role": "assistant", "content": f"Error during processing: {str(e)}"}]
+            outputs[7] = chat_history
+            raise
+    except Exception as e:
+        logger.error(f"Error in process_pnid: {str(e)}")
+        logger.error(f"Stack trace:\n{traceback.format_exc()}")
+        # Update chat with error message
+        chat_history = [{"role": "assistant", "content": f"Error: {str(e)}"}]
+        outputs[7] = chat_history
+        raise
+# Separate function for Chat interaction
+def handle_user_message(user_input, chat_history, json_path_state):
+    """Handle user messages and generate responses."""
+    try:
+        if not user_input or not user_input.strip():
+            return chat_history
+        # Add user message
+        timestamp = get_timestamp()
+        new_history = chat_history + chat_message("user", user_input, user_avatar, timestamp)
+        # Check if json_path exists and is valid
+        if not json_path_state or not os.path.exists(json_path_state):
+            error_message = "Please upload and process a P&ID document first."
+            return new_history + chat_message("assistant", error_message, agent_avatar, get_timestamp())
+        try:
+            # Log for debugging
+            logger.info(f"Sending question to assistant: {user_input}")
+            logger.info(f"Using JSON path: {json_path_state}")
+            # Generate response
+            response = get_assistant_response(user_input, json_path_state)
+            # Handle the response
+            if isinstance(response, (str, dict)):
+                response_text = str(response)
+            else:
+                try:
+                    # Try to get the first response from generator
+                    response_text = next(response) if hasattr(response, '__next__') else str(response)
+                except StopIteration:
+                    response_text = "I apologize, but I couldn't generate a response."
+                except Exception as e:
+                    logger.error(f"Error processing response: {str(e)}")
+                    response_text = "I apologize, but I encountered an error processing your request."
+            logger.info(f"Generated response: {response_text}")
+            if not response_text.strip():
+                response_text = "I apologize, but I couldn't generate a response. Please try asking your question differently."
+            # Add response to chat history
+            new_history += chat_message("assistant", response_text, agent_avatar, get_timestamp())
+        except Exception as e:
+            logger.error(f"Error generating response: {str(e)}")
+            logger.error(traceback.format_exc())
+            error_message = "I apologize, but I encountered an error processing your request. Please try again."
+            new_history += chat_message("assistant", error_message, agent_avatar, get_timestamp())
+        return new_history
+    except Exception as e:
+        logger.error(f"Chat error: {str(e)}")
+        logger.error(traceback.format_exc())
+        return chat_history + chat_message(
+            "assistant",
+            "I apologize, but something went wrong. Please try again.",
+            agent_avatar,
+            get_timestamp()
+        )
+# Update custom CSS
+custom_css = """
+.full-height-row {
+    height: calc(100vh - 150px);  /* Adjusted height */
+    margin: 0;
+    padding: 10px;
+}
+.upload-box {
+    background: #2a2a2a;
+    border-radius: 8px;
+    padding: 15px;
+    margin-bottom: 15px;
+    border: 1px solid #3a3a3a;
+}
+.status-box-container {
+    background: #2a2a2a;
+    border-radius: 8px;
+    padding: 15px;
+    height: calc(100vh - 350px);  /* Reduced height */
+    border: 1px solid #3a3a3a;
+    margin-bottom: 15px;
+}
+.status-box {
+    font-family: 'Courier New', monospace;
+    font-size: 12px;
+    line-height: 1.4;
+    background-color: #1a1a1a;
+    color: #00ff00;
+    padding: 10px;
+    border-radius: 5px;
+    height: calc(100% - 40px);  /* Adjust for header */
+    overflow-y: auto;
+    white-space: pre-wrap;
+    word-wrap: break-word;
+    border: none;
+}
+.preview-tabs {
+    height: calc(100vh - 100px);  /* Increased container height from 200px */
+    background: #2a2a2a;
+    border-radius: 8px;
+    padding: 15px;
+    border: 1px solid #3a3a3a;
+    margin-bottom: 15px;
+}
+.chat-container {
+    height: 100%;  /* Take full height */
+    display: flex;
+    flex-direction: column;
+    background: #2a2a2a;
+    border-radius: 8px;
+    padding: 15px;
+    border: 1px solid #3a3a3a;
+}
+  .chatbox {
+    flex: 1;  /* Take remaining space */
+      overflow-y: auto;
+    background: #1a1a1a;
+    border-radius: 8px;
+    padding: 15px;
+    margin-bottom: 15px;
+    color: #ffffff;
+    min-height: 200px;  /* Ensure minimum height */
+}
+.chat-input-group {
+    height: auto;  /* Allow natural height */
+    min-height: 120px;  /* Minimum height for input area */
+    background: #1a1a1a;
+    border-radius: 8px;
+    padding: 15px;
+    margin-top: auto;  /* Push to bottom */
+}
+.chat-input {
+    background: #2a2a2a;
+    color: #ffffff;
+    border: 1px solid #3a3a3a;
+      border-radius: 5px;
+    padding: 12px;
+    min-height: 80px;
+    width: 100%;
+    margin-bottom: 10px;
+}
+.send-button {
+    width: 100%;
+    background: #4a4a4a;
+    color: #ffffff;
+    border-radius: 5px;
+    border: none;
+    padding: 12px;
+    cursor: pointer;
+    transition: background-color 0.3s;
+}
+.result-image {
+    border-radius: 8px;
+    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+    margin: 10px 0;
+    background: #ffffff;
+}
+.chat-message {
+    display: flex;
+    margin-bottom: 1rem;
+    align-items: flex-start;
+}
+.chat-message .avatar {
+    width: 40px;
+    height: 40px;
+    margin-right: 10px;
+    border-radius: 50%;
+}
+.chat-message .speech-bubble {
+    background: #2a2a2a;
+    padding: 10px 15px;
+    border-radius: 10px;
+    max-width: 80%;
+    margin-bottom: 5px;
+}
+.chat-message .timestamp {
+    font-size: 0.8em;
+    color: #666;
+}
+.logo-row {
+    width: 100%;
+    background-color: #1a1a1a;
+    padding: 10px 0;
+    margin: 0;
+    border-bottom: 1px solid #3a3a3a;
+}
+"""
+def create_ui():
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    logo_path = os.path.join(current_dir, "assets", "intuigence.png")
+    css = """
+    /* Theme colors */
+    :root {
+        --orange-primary: #ff6b2b;
+        --orange-hover: #ff8651;
+        --orange-light: rgba(255, 107, 43, 0.1);
+    }
+    /* Logo styling */
+    .logo-container {
+        padding: 10px 20px;
+        margin-bottom: 10px;
+        text-align: left;
+        width: 100%;
+        background: #1a1a1a;  /* Match app background */
+        border-bottom: 1px solid #3a3a3a;
+    }
+    .logo-container img {
+        max-height: 40px;
+        width: auto;
+        display: inline-block !important;
+    }
+    /* Hide download and fullscreen buttons for logo */
+    .logo-container .download-button,
+    .logo-container .fullscreen-button {
+        display: none !important;
+    }
+    /* Adjust main content padding */
+    .main-content {
+        padding-top: 10px;
+    }
+    /* Custom orange theme */
+    .primary-button {
+        background: var(--orange-primary) !important;
+        color: white !important;
+        border: none !important;
+    }
+    .primary-button:hover {
+        background: var(--orange-hover) !important;
+    }
+    /* Tab styling */
+    .tabs > .tab-nav > button.selected {
+        border-color: var(--orange-primary) !important;
+        color: var(--orange-primary) !important;
+    }
+    .tabs > .tab-nav > button:hover {
+        border-color: var(--orange-hover) !important;
+        color: var(--orange-hover) !important;
+    }
+    /* File upload button */
+    .file-upload {
+        background: var(--orange-primary) !important;
+    }
+    /* Progress bar */
+    .progress-bar > div {
+        background: var(--orange-primary) !important;
+    }
+    /* Tags and labels */
+    .label-wrap {
+        background: var(--orange-primary) !important;
+    }
+    /* Selected/active states */
+    .selected, .active, .focused {
+        border-color: var(--orange-primary) !important;
+        color: var(--orange-primary) !important;
+    }
+    /* Links and interactive elements */
+    a, .link, .interactive {
+        color: var(--orange-primary) !important;
+    }
+    a:hover, .link:hover, .interactive:hover {
+        color: var(--orange-hover) !important;
+    }
+    /* Input focus states */
+    input:focus, textarea:focus {
+        border-color: var(--orange-primary) !important;
+        box-shadow: 0 0 0 1px var(--orange-light) !important;
+    }
+    /* Checkbox and radio */
+    input[type="checkbox"]:checked, input[type="radio"]:checked {
+        background-color: var(--orange-primary) !important;
+        border-color: var(--orange-primary) !important;
+    }
+    """
+    with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
+        # Logo row (before main content)
+        with gr.Row(elem_classes="logo-container"):
+            gr.Image(
+                value=logo_path,
+                show_label=False,
+                container=False,
+                interactive=False,
+                show_download_button=False,
+                show_share_button=False,
+                height=40
+            )
+        # State for storing file path
+        file_path = gr.State()
+        json_path = gr.State()
+        # Main content row
+        with gr.Row(elem_classes="main-content"):
+            # Left column - File Upload & Processing
+            with gr.Column(scale=3, elem_classes="column-panel"):
+                file_output = gr.File(label="Upload P&ID Document")
+                process_button = gr.Button(
+                    "Process Document",
+                    elem_classes="primary-button"  # Add custom class
+                )
+                progress_output = gr.Textbox(
+                    label="Progress",
+                    value="Waiting for document...",
+                    interactive=False
+                )
+            # Center column - Preview Panel
+            with gr.Column(scale=5, elem_classes="column-panel preview-panel"):
+                with gr.Tabs() as tabs:
+                    with gr.TabItem("P&ID"):
+                        input_image = gr.Image(type="filepath", label="Original")
+                    with gr.TabItem("Symbols"):
+                        symbol_image = gr.Image(type="filepath", label="Detected Symbols")
+                    with gr.TabItem("Tags"):
+                        text_image = gr.Image(type="filepath", label="Detected Tags")
+                    with gr.TabItem("Lines"):
+                        line_image = gr.Image(type="filepath", label="Detected Lines")
+                    with gr.TabItem("Aggregated"):
+                        aggregated_image = gr.Image(type="filepath", label="Aggregated Results")
+                    with gr.TabItem("Knowledge Graph"):
+                        graph_image = gr.Image(type="filepath", label="Knowledge Graph")
+            # Right column - Chat Interface
+            with gr.Column(scale=4, elem_classes="column-panel chat-panel", elem_id="chat-panel"):
+                chat_history = gr.Chatbot(
+                    [],
+                    elem_classes="chat-history",
+                    height=400,
+                    show_label=False,
+                    type="messages",
+                    elem_id="chat-history"
+                )
+                with gr.Row():
+                    chat_input = gr.Textbox(
+                        placeholder="Ask me about the P&ID...",
+                        show_label=False,
+                        container=False
+                    )
+                    chat_button = gr.Button(
+                        "Send",
+                        elem_classes="primary-button"  # Add custom class
+                    )
+        def handle_chat(user_message, chat_history, json_path):
+            if not user_message:
+                return "", chat_history
+            # Add user message
+            chat_history = chat_history + [{"role": "user", "content": user_message}]
+            try:
+                # Get assistant response
+                response = get_assistant_response(user_message, json_path)
+                # Add assistant response
+                chat_history = chat_history + [{"role": "assistant", "content": response}]
+            except Exception as e:
+                logger.error(f"Error in chat response: {str(e)}")
+                chat_history = chat_history + [
+                    {"role": "assistant", "content": "I apologize, but I encountered an error processing your request."}
+                ]
+            return "", chat_history
+        # Connect UI elements
+        chat_input.submit(handle_chat, [chat_input, chat_history, json_path], [chat_input, chat_history])
+        chat_button.click(handle_chat, [chat_input, chat_history, json_path], [chat_input, chat_history])
+        process_button.click(
+        process_pnid,
+            inputs=[file_output],
+        outputs=[
+                progress_output,  # Progress text (0)
+                input_image,      # P&ID (1)
+                symbol_image,     # Symbols (2)
+                text_image,       # Tags (3)
+                line_image,       # Lines (4)
+                aggregated_image, # Aggregated (5)
+                graph_image,      # Graph (6)
+                chat_history,     # Chat (7)
+                json_path        # State (8)
+            ],
+            show_progress="hidden"  # Hide progress in tabs
+        )
+    return demo
+def main():
+    # Download models if they don't exist
+    if not os.path.exists('models/yolo/yolov8n.pt'):
+        copy_local_models()
+    demo = create_ui()
+    # Remove HF Spaces conditional, just use local development settings
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7861,  # Changed from 7860
+        share=True
+    )
+if __name__ == "__main__":
+    main()

graph_construction.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import os
+import json
+import networkx as nx
+import matplotlib.pyplot as plt
+from pathlib import Path
+import logging
+import traceback
+from storage import StorageFactory
+logger = logging.getLogger(__name__)
+def construct_graph_network(data: dict, validation_results_path: str, results_dir: str, storage=None):
+    """Construct network graph from aggregated detection data"""
+    try:
+        # Use provided storage or get a new one
+        if storage is None:
+            storage = StorageFactory.get_storage()
+        # Create graph
+        G = nx.Graph()
+        pos = {}  # For node positions
+        # Add nodes from the aggregated data
+        for node in data.get('nodes', []):
+            node_id = node['id']
+            node_type = node['type']
+            # Calculate position based on node type
+            if node_type == 'connection_point':
+                pos[node_id] = (node['coords']['x'], node['coords']['y'])
+            else:  # symbol or text
+                bbox = node['bbox']
+                pos[node_id] = (
+                    (bbox['xmin'] + bbox['xmax']) / 2,
+                    (bbox['ymin'] + bbox['ymax']) / 2
+                )
+            # Add node with all its properties
+            G.add_node(node_id, **node)
+        # Add edges from the aggregated data
+        for edge in data.get('edges', []):
+            G.add_edge(
+                edge['source'],
+                edge['target'],
+                **edge.get('properties', {})
+            )
+        # Create visualization
+        plt.figure(figsize=(20, 20))
+        # Draw nodes with different colors based on type
+        node_colors = []
+        for node in G.nodes():
+            node_type = G.nodes[node]['type']
+            if node_type == 'symbol':
+                node_colors.append('lightblue')
+            elif node_type == 'text':
+                node_colors.append('lightgreen')
+            else:  # connection_point
+                node_colors.append('lightgray')
+        nx.draw_networkx_nodes(G, pos, node_color=node_colors, node_size=500)
+        nx.draw_networkx_edges(G, pos, edge_color='gray', width=1)
+        # Add labels
+        labels = {}
+        for node in G.nodes():
+            node_data = G.nodes[node]
+            if node_data['type'] == 'symbol':
+                labels[node] = f"S:{node_data.get('properties', {}).get('class', '')}"
+            elif node_data['type'] == 'text':
+                content = node_data.get('content', '')
+                labels[node] = f"T:{content[:10]}..." if len(content) > 10 else f"T:{content}"
+            else:
+                labels[node] = f"C:{node_data['properties'].get('point_type', '')}"
+        nx.draw_networkx_labels(G, pos, labels, font_size=8)
+        plt.title("P&ID Knowledge Graph")
+        plt.axis('off')
+        # Save the visualization
+        graph_image_path = os.path.join(results_dir, f"{Path(data.get('image_path', 'graph')).stem}_graph.png")
+        plt.savefig(graph_image_path, bbox_inches='tight', dpi=300)
+        plt.close()
+        # Save graph data as JSON for future use
+        graph_json_path = os.path.join(results_dir, f"{Path(data.get('image_path', 'graph')).stem}_graph_data.json")
+        with open(graph_json_path, 'w') as f:
+            json.dump(nx.node_link_data(G), f, indent=2)
+        return G, pos, plt.gcf()
+    except Exception as e:
+        logger.error(f"Error in construct_graph_network: {str(e)}")
+        traceback.print_exc()
+        return None, None, None
+if __name__ == "__main__":
+    # Test code
+    test_data_path = "results/test_aggregated.json"
+    if os.path.exists(test_data_path):
+        with open(test_data_path, 'r') as f:
+            test_data = json.load(f)
+        G, pos, fig = construct_graph_network(
+            test_data,
+            "results/validation.json",
+            "results"
+        )
+        if fig:
+            plt.show()

graph_processor.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import json
+import networkx as nx
+import numpy as np
+import matplotlib.pyplot as plt
+import traceback
+import uuid
+def create_connected_graph(input_data):
+    """Create a connected graph from the input data"""
+    try:
+        # Validate input data structure
+        if not isinstance(input_data, dict):
+            raise ValueError("Invalid input data format")
+        # Check for required keys in new format
+        required_keys = ['symbols', 'texts', 'lines', 'nodes', 'edges']
+        if not all(key in input_data for key in required_keys):
+            raise ValueError(f"Missing required keys in input data. Expected: {required_keys}")
+        # Create graph
+        G = nx.Graph()
+        # Track positions for layout
+        pos = {}
+        # Add symbol nodes
+        for symbol in input_data['symbols']:
+            bbox = symbol.get('bbox', [])
+            symbol_id = symbol.get('id', str(uuid.uuid4()))
+            if bbox:
+                # Calculate center position
+                center_x = (bbox['xmin'] + bbox['xmax']) / 2
+                center_y = (bbox['ymin'] + bbox['ymax']) / 2
+                pos[symbol_id] = (center_x, center_y)
+                G.add_node(
+                    symbol_id,
+                    type='symbol',
+                    class_name=symbol.get('class', ''),
+                    bbox=bbox,
+                    confidence=symbol.get('confidence', 0.0)
+                )
+        # Add text nodes
+        for text in input_data['texts']:
+            bbox = text.get('bbox', [])
+            text_id = text.get('id', str(uuid.uuid4()))
+            if bbox:
+                center_x = (bbox['xmin'] + bbox['xmax']) / 2
+                center_y = (bbox['ymin'] + bbox['ymax']) / 2
+                pos[text_id] = (center_x, center_y)
+                G.add_node(
+                    text_id,
+                    type='text',
+                    text=text.get('text', ''),
+                    bbox=bbox,
+                    confidence=text.get('confidence', 0.0)
+                )
+        # Add edges from the edges list
+        for edge in input_data['edges']:
+            source = edge.get('source')
+            target = edge.get('target')
+            if source and target and source in G and target in G:
+                G.add_edge(
+                    source,
+                    target,
+                    type=edge.get('type', 'connection'),
+                    properties=edge.get('properties', {})
+                )
+        # Create visualization
+        plt.figure(figsize=(20, 20))
+        # Draw nodes with fixed positions
+        nx.draw_networkx_nodes(G, pos,
+                               node_color=['lightblue' if G.nodes[node]['type'] == 'symbol' else 'lightgreen' for node
+                                           in G.nodes()],
+                               node_size=500)
+        # Draw edges
+        nx.draw_networkx_edges(G, pos, edge_color='gray', width=1)
+        # Add labels
+        labels = {}
+        for node in G.nodes():
+            node_data = G.nodes[node]
+            if node_data['type'] == 'symbol':
+                labels[node] = f"S:{node_data['class_name']}"
+            else:
+                text = node_data.get('text', '')
+                labels[node] = f"T:{text[:10]}..." if len(text) > 10 else f"T:{text}"
+        nx.draw_networkx_labels(G, pos, labels, font_size=8)
+        plt.title("P&ID Network Graph")
+        plt.axis('off')
+        return G, pos, plt.gcf()
+    except Exception as e:
+        print(f"Error in create_connected_graph: {str(e)}")
+        traceback.print_exc()
+        return None, None, None
+if __name__ == "__main__":
+    # Test code
+    with open('results/0_aggregated.json') as f:
+        data = json.load(f)
+    G, pos, fig = create_connected_graph(data)
+    if fig:
+        plt.show()

graph_visualization.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import json
+import networkx as nx
+import matplotlib.pyplot as plt
+import os
+from pprint import pprint
+import uuid
+import argparse
+from pathlib import Path
+from tqdm import tqdm
+def create_graph_visualization(json_path: str, output_dir: str, base_name: str, save_plot: bool = True) -> dict:
+    """Create graph visualization using actual coordinates from bboxes"""
+    try:
+        # Remove '_aggregated' suffix if present
+        if base_name.endswith('_aggregated'):
+            base_name = base_name[:-len('_aggregated')]
+        print("\nLoading JSON data...")
+        with open(json_path, 'r') as f:
+            data = json.load(f)
+        # Create graph
+        G = nx.Graph()
+        pos = {}
+        valid_nodes = []
+        invalid_nodes = []
+        # First pass - collect valid nodes
+        print("\nValidating nodes...")
+        for node in tqdm(data.get('nodes', []), desc="Validating"):
+            try:
+                node_id = str(node.get('id', ''))
+                x = float(node.get('x', 0))
+                y = float(node.get('y', 0))
+                if node_id and x and y:  # Only add if we have valid coordinates
+                    valid_nodes.append(node)
+                    pos[node_id] = (x, y)
+                else:
+                    invalid_nodes.append(node)
+            except (ValueError, TypeError) as e:
+                invalid_nodes.append(node)
+                continue
+        print(f"\nFound {len(valid_nodes)} valid nodes and {len(invalid_nodes)} invalid nodes")
+        # Add valid nodes
+        print("\nAdding valid nodes...")
+        for node in tqdm(valid_nodes, desc="Nodes"):
+            node_id = str(node.get('id', ''))
+            attrs = {
+                'type': node.get('type', ''),
+                'label': node.get('label', ''),
+                'x': float(node.get('x', 0)),
+                'y': float(node.get('y', 0))
+            }
+            G.add_node(node_id, **attrs)
+        # Add valid edges (only between valid nodes)
+        print("\nAdding valid edges...")
+        valid_edges = []
+        invalid_edges = []
+        for edge in tqdm(data.get('edges', []), desc="Edges"):
+            try:
+                start_id = str(edge.get('start_point', ''))
+                end_id = str(edge.get('end_point', ''))
+                if start_id in pos and end_id in pos:  # Only add if both nodes exist
+                    valid_edges.append(edge)
+                    attrs = {
+                        'type': edge.get('type', ''),
+                        'weight': edge.get('weight', 1.0)
+                    }
+                    G.add_edge(start_id, end_id, **attrs)
+                else:
+                    invalid_edges.append(edge)
+            except Exception as e:
+                invalid_edges.append(edge)
+                continue
+        print(f"\nFound {len(valid_edges)} valid edges and {len(invalid_edges)} invalid edges")
+        if save_plot:
+            print("\nGenerating visualization...")
+            plt.figure(figsize=(20, 20))
+            print("Drawing graph elements...")
+            with tqdm(total=3, desc="Drawing") as pbar:
+                # Draw nodes
+                nx.draw_networkx_nodes(G, pos,
+                                     node_color='lightblue',
+                                     node_size=100)
+                pbar.update(1)
+                # Draw edges
+                nx.draw_networkx_edges(G, pos)
+                pbar.update(1)
+                # Save plot
+                image_path = os.path.join(output_dir, f"{base_name}_graph_visualization.png")
+                plt.savefig(image_path, bbox_inches='tight', dpi=300)
+                plt.close()
+                pbar.update(1)
+            print(f"\nVisualization saved to: {image_path}")
+            return {
+                'success': True,
+                'image_path': image_path,
+                'graph': G,
+                'stats': {
+                    'valid_nodes': len(valid_nodes),
+                    'invalid_nodes': len(invalid_nodes),
+                    'valid_edges': len(valid_edges),
+                    'invalid_edges': len(invalid_edges)
+                }
+            }
+        return {
+            'success': True,
+            'graph': G
+        }
+    except Exception as e:
+        print(f"\nError creating graph: {str(e)}")
+        return {
+            'success': False,
+            'error': str(e)
+        }
+if __name__ == "__main__":
+    """Test the graph visualization independently"""
+    # Set up argument parser
+    parser = argparse.ArgumentParser(description='Create and visualize graph from aggregated JSON')
+    parser.add_argument('--json_path', type=str, default="results/002_page_1_aggregated.json",
+                      help='Path to aggregated JSON file')
+    parser.add_argument('--output_dir', type=str, default="results",
+                      help='Directory to save outputs')
+    parser.add_argument('--show', action='store_true',
+                      help='Show the plot interactively')
+    args = parser.parse_args()
+    # Verify input file exists
+    if not os.path.exists(args.json_path):
+        print(f"Error: Could not find input file {args.json_path}")
+        exit(1)
+    # Create output directory if it doesn't exist
+    os.makedirs(args.output_dir, exist_ok=True)
+    # Get base name from input file and remove '_aggregated' suffix
+    base_name = Path(args.json_path).stem
+    if base_name.endswith('_aggregated'):
+        base_name = base_name[:-len('_aggregated')]
+    print(f"\nProcessing:")
+    print(f"Input:  {args.json_path}")
+    print(f"Output: {args.output_dir}/{base_name}_graph_visualization.png")
+    try:
+        # Create visualization
+        result = create_graph_visualization(
+            json_path=args.json_path,
+            output_dir=args.output_dir,
+            base_name=base_name,
+            save_plot=True
+        )
+        if result['success']:
+            print(f"\nSuccess! Graph visualization saved to: {result['image_path']}")
+            if args.show:
+                plt.show()
+        else:
+            print(f"\nError: {result['error']}")
+    except Exception as e:
+        print(f"\nError during visualization: {str(e)}")
+        raise

line_detection_ai.py ADDED Viewed

	@@ -0,0 +1,413 @@

+from base import BaseDetector, BaseDetectionPipeline
+from utils import *
+from config import (
+    ImageConfig,
+    SymbolConfig,
+    TagConfig,
+    LineConfig,
+    PointConfig,
+    JunctionConfig
+)
+from detectors import (
+    LineDetector,
+    PointDetector,
+    JunctionDetector,
+    SymbolDetector,
+    TagDetector
+)
+from pathlib import Path
+from storage import StorageFactory
+from common import DetectionResult
+from detection_schema import DetectionContext, JunctionType
+from typing import List, Tuple, Optional, Dict
+import torch
+import numpy as np
+import cv2
+import os
+import logging
+logger = logging.getLogger(__name__)
+class DiagramDetectionPipeline:
+    """
+    Pipeline that runs multiple detectors (line, point, junction, etc.) on an image,
+    and keeps a shared DetectionContext in memory.
+    """
+    def __init__(self,
+                 tag_detector: Optional[BaseDetector],
+                 symbol_detector: Optional[BaseDetector],
+                 line_detector: Optional[BaseDetector],
+                 point_detector: Optional[BaseDetector],
+                 junction_detector: Optional[BaseDetector],
+                 storage: StorageInterface,
+                 debug_handler: Optional[DebugHandler] = None,
+                 transformer: Optional[CoordinateTransformer] = None):
+        """
+        You can pass None for detectors you don't need.
+        """
+        # super().__init__(storage=storage, debug_handler=debug_handler)
+        self.storage = storage
+        self.debug_handler = debug_handler
+        self.tag_detector = tag_detector
+        self.symbol_detector = symbol_detector
+        self.line_detector = line_detector
+        self.point_detector = point_detector
+        self.junction_detector = junction_detector
+        self.transformer = transformer or CoordinateTransformer()
+    def _load_image(self, image_path: str) -> np.ndarray:
+        """Load image with validation."""
+        image_data = self.storage.load_file(image_path)
+        image = cv2.imdecode(np.frombuffer(image_data, np.uint8), cv2.IMREAD_COLOR)
+        if image is None:
+            raise ValueError(f"Failed to load image from {image_path}")
+        return image
+    def _crop_to_roi(self, image: np.ndarray, roi: Optional[list]) -> Tuple[np.ndarray, Tuple[int, int]]:
+        """Crop to ROI if provided, else return full image."""
+        if roi is not None and len(roi) == 4:
+            x_min, y_min, x_max, y_max = roi
+            return image[y_min:y_max, x_min:x_max], (x_min, y_min)
+        return image, (0, 0)
+    def _remove_symbol_tag_bboxes(self, image: np.ndarray, context: DetectionContext) -> np.ndarray:
+        """Fill symbol & tag bounding boxes with white to avoid line detection picking them up."""
+        masked = image.copy()
+        for sym in context.symbols.values():
+            cv2.rectangle(masked,
+                          (sym.bbox.xmin, sym.bbox.ymin),
+                          (sym.bbox.xmax, sym.bbox.ymax),
+                          (255, 255, 255),  # White
+                          thickness=-1)
+        for tg in context.tags.values():
+            cv2.rectangle(masked,
+                          (tg.bbox.xmin, tg.bbox.ymin),
+                          (tg.bbox.xmax, tg.bbox.ymax),
+                          (255, 255, 255),
+                          thickness=-1)
+        return masked
+    def run(
+            self,
+            image_path: str,
+            output_dir: str,
+            config
+    ) -> DetectionResult:
+        """
+        Main pipeline steps (in local coords):
+          1) Load + crop image
+          2) Detect symbols & tags
+          3) Make a copy for final debug images
+          4) White out symbol/tag bounding boxes
+          5) Detect lines, points, junctions
+          6) Save final JSON
+          7) Generate debug images with various combinations
+        """
+        try:
+            with self.debug_handler.track_performance("total_processing"):
+                # 1) Load & crop
+                image = self._load_image(image_path)
+                # cropped_image, roi_offset = self._crop_to_roi(image, config.roi)
+                # 2) Create fresh context
+                context = DetectionContext()
+                # 3) Detect symbols
+                with self.debug_handler.track_performance("symbol_detection"):
+                    self.symbol_detector.detect(
+                        image,
+                        context=context,
+                    )
+                # 4) Detect tags
+                with self.debug_handler.track_performance("tag_detection"):
+                    self.tag_detector.detect(
+                        image,
+                        context=context,
+                    )
+                # Make a copy of the cropped image for final debug combos
+                debug_cropped = image.copy()
+                # 5) White-out symbol/tag bboxes in the original cropped image
+                cropped_image = self._remove_symbol_tag_bboxes(image, context)
+                # 6) Detect lines
+                with self.debug_handler.track_performance("line_detection"):
+                    self.line_detector.detect(cropped_image, context=context)
+                # 7) Detect points
+                if self.point_detector:
+                    with self.debug_handler.track_performance("point_detection"):
+                        self.point_detector.detect(cropped_image, context=context)
+                # 8) Detect junctions
+                if self.junction_detector:
+                    with self.debug_handler.track_performance("junction_detection"):
+                        self.junction_detector.detect(cropped_image, context=context)
+                # 9) Save final JSON & any final images
+                output_paths = self._persist_results(output_dir, image_path, context)
+                # 10) Save debug images in local coords using debug_cropped
+                self._save_all_combinations(debug_cropped, context, output_dir, image_path)
+                return DetectionResult(
+                    success=True,
+                    processing_time=self.debug_handler.metrics.get('total_processing', 0),
+                    json_path=output_paths.get('json_path'),
+                    image_path=output_paths.get('image_path')  # Now returning the annotated image path
+                )
+        except Exception as e:
+            logger.error(f"Processing failed: {str(e)}")
+            return DetectionResult(
+                success=False,
+                error=str(e)
+            )
+    # ------------------------------------------------
+    # HELPER FUNCTIONS
+    # ------------------------------------------------
+    def _persist_results(self, output_dir: str, image_path: str, context: DetectionContext) -> dict:
+        """Saves only JSON and line detection visualization"""
+        base_name = Path(image_path).stem
+        if base_name.startswith('display_'):
+            base_name = base_name[8:]
+        # Save JSON
+        json_path = Path(output_dir) / f"{base_name}_detected_lines.json"
+        context_json_str = context.to_json(indent=2)
+        self.storage.save_file(str(json_path), context_json_str.encode('utf-8'))
+        # Save line detection visualization using input image
+        annotated = self._draw_objects(
+            self._load_image(image_path),  # Use input image instead of output
+            context,
+            draw_lines=True,
+            draw_points=False,
+            draw_symbols=False,
+            draw_junctions=False,
+            draw_tags=False
+        )
+        # Save visualization
+        image_path = Path(output_dir) / f"{base_name}_detected_lines.png"
+        _, encoded = cv2.imencode('.png', annotated)
+        self.storage.save_file(str(image_path), encoded.tobytes())
+        return {
+            "json_path": str(json_path),
+            "image_path": str(image_path)
+        }
+    def _save_all_combinations(self, local_image: np.ndarray, context: DetectionContext,
+                             output_dir: str, image_path: str) -> None:
+        """Only save line detection visualization"""
+        base_name = Path(image_path).stem
+        if base_name.startswith('display_'):
+            base_name = base_name[8:]
+        # Only save line detection visualization
+        annotated = self._draw_objects(local_image, context,
+                                     draw_symbols=False,
+                                     draw_tags=False,
+                                     draw_lines=True,
+                                     draw_points=False,
+                                     draw_junctions=False)
+        save_name = f"{base_name}_detected_lines.png"
+        save_path = Path(output_dir) / save_name
+        _, encoded = cv2.imencode('.png', annotated)
+        self.storage.save_file(str(save_path), encoded.tobytes())
+    def _draw_objects(self, base_image: np.ndarray, context: DetectionContext,
+                      draw_lines: bool = True, draw_points: bool = True,
+                      draw_symbols: bool = True, draw_junctions: bool = True,
+                      draw_tags: bool = True) -> np.ndarray:
+        """Draw detection results on a copy of base_image in local coords."""
+        annotated = base_image.copy()
+        # Lines
+        if draw_lines:
+            for ln in context.lines.values():
+                cv2.line(annotated,
+                         (ln.start.coords.x, ln.start.coords.y),
+                         (ln.end.coords.x, ln.end.coords.y),
+                         (0, 255, 0),  # green
+                         2)
+        # Points
+        if draw_points:
+            for pt in context.points.values():
+                cv2.circle(annotated,
+                           (pt.coords.x, pt.coords.y),
+                           3,
+                           (0, 0, 255),  # red
+                           -1)
+        # Symbols
+        if draw_symbols:
+            for sym in context.symbols.values():
+                cv2.rectangle(annotated,
+                              (sym.bbox.xmin, sym.bbox.ymin),
+                              (sym.bbox.xmax, sym.bbox.ymax),
+                              (255, 255, 0),  # cyan
+                              2)
+                cv2.circle(annotated,
+                           (sym.center.x, sym.center.y),
+                           4,
+                           (255, 0, 255),  # magenta
+                           -1)
+        # Junctions
+        if draw_junctions:
+            for jn in context.junctions.values():
+                if jn.junction_type == JunctionType.T:
+                    color = (0, 165, 255)  # orange
+                elif jn.junction_type == JunctionType.L:
+                    color = (255, 0, 255)  # magenta
+                else:  # END
+                    color = (0, 0, 255)  # red
+                cv2.circle(annotated,
+                           (jn.center.x, jn.center.y),
+                           5,
+                           color,
+                           -1)
+        # Tags
+        if draw_tags:
+            for tg in context.tags.values():
+                cv2.rectangle(annotated,
+                              (tg.bbox.xmin, tg.bbox.ymin),
+                              (tg.bbox.xmax, tg.bbox.ymax),
+                              (128, 0, 128),  # purple
+                              2)
+                cv2.putText(annotated,
+                            tg.text,
+                            (tg.bbox.xmin, tg.bbox.ymin - 5),
+                            cv2.FONT_HERSHEY_SIMPLEX,
+                            0.5,
+                            (128, 0, 128),
+                            1)
+        return annotated
+    def detect_lines(self, image_path: str, output_dir: str, config: Optional[Dict] = None) -> Dict:
+        """Legacy interface for line detection"""
+        storage = StorageFactory.get_storage()
+        debug_handler = DebugHandler(enabled=True, storage=storage)
+        line_detector = LineDetector(
+            config=LineConfig(),
+            model_path="models/deeplsd_md.tar",
+            device=torch.device("cpu"),
+            debug_handler=debug_handler
+        )
+        pipeline = DiagramDetectionPipeline(
+            tag_detector=None,
+            symbol_detector=None,
+            line_detector=line_detector,
+            point_detector=None,
+            junction_detector=None,
+            storage=storage,
+            debug_handler=debug_handler
+        )
+        result = pipeline.run(image_path, output_dir, ImageConfig())
+        return result
+    def _validate_and_normalize_coordinates(self, points):
+        """Validate and normalize coordinates to image space"""
+        valid_points = []
+        for point in points:
+            x, y = point['x'], point['y']
+            # Validate coordinates are within image bounds
+            if 0 <= x <= self.image_width and 0 <= y <= self.image_height:
+                # Normalize coordinates if needed
+                valid_points.append({
+                    'x': int(x),
+                    'y': int(y),
+                    'type': point.get('type', 'unknown'),
+                    'confidence': point.get('confidence', 1.0)
+                })
+        return valid_points
+if __name__ == "__main__":
+    # 1) Initialize components
+    storage = StorageFactory.get_storage()
+    debug_handler = DebugHandler(enabled=True, storage=storage)
+    # 2) Build detectors
+    conf = {
+        "detect_lines": True,
+        "line_detection_params": {
+            "merge": True,
+            "filtering": True,
+            "grad_thresh": 3,
+            "grad_nfa": True
+        }
+    }
+    # 3) Configure
+    line_config = LineConfig()
+    point_config = PointConfig()
+    junction_config = JunctionConfig()
+    symbol_config = SymbolConfig()
+    tag_config = TagConfig()
+    # ========================== Detectors ========================== #
+    symbol_detector = SymbolDetector(
+        config=symbol_config,
+        debug_handler=debug_handler
+    )
+    tag_detector = TagDetector(
+        config=tag_config,
+        debug_handler=debug_handler
+    )
+    line_detector = LineDetector(
+        config=line_config,
+        model_path="models/deeplsd_md.tar",
+        model_config=conf,
+        device=torch.device("cuda"),  # or "cuda" if available
+        debug_handler=debug_handler
+    )
+    point_detector = PointDetector(
+        config=point_config,
+        debug_handler=debug_handler)
+    junction_detector = JunctionDetector(
+        config=junction_config,
+        debug_handler=debug_handler
+    )
+    # 4) Create pipeline
+    pipeline = DiagramDetectionPipeline(
+        tag_detector=tag_detector,
+        symbol_detector=symbol_detector,
+        line_detector=line_detector,
+        point_detector=point_detector,
+        junction_detector=junction_detector,
+        storage=storage,
+        debug_handler=debug_handler
+    )
+    # 5) Run pipeline
+    result = pipeline.run(
+        image_path="samples/images/0.jpg",
+        output_dir="results/",
+        config=ImageConfig()
+    )
+    if result.success:
+        logger.info(f"Pipeline succeeded! See JSON at {result.json_path}")
+    else:
+        logger.error(f"Pipeline failed: {result.error}")

line_detectors.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import cv2
+import numpy as np
+import torch
+from typing import Dict, List, Optional, Tuple
+from loguru import logger
+# Check if DeepLSD is available
+try:
+    from deeplsd.models.deeplsd_inference import DeepLSD
+    DEEPLSD_AVAILABLE = True
+except ImportError:
+    DEEPLSD_AVAILABLE = False
+    logger.warning("DeepLSD not available, falling back to OpenCV")
+class OpenCVLineDetector:
+    """Fallback line detector using OpenCV's HoughLinesP"""
+    def __init__(self):
+        self.params = {
+            'threshold': 50,
+            'minLineLength': 50,
+            'maxLineGap': 10
+        }
+    def detect(self, image: np.ndarray) -> Dict:
+        """Detect lines using HoughLinesP"""
+        if len(image.shape) == 3:
+            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        else:
+            gray = image
+        edges = cv2.Canny(gray, 50, 150, apertureSize=3)
+        lines = cv2.HoughLinesP(
+            edges, 1, np.pi/180,
+            threshold=self.params['threshold'],
+            minLineLength=self.params['minLineLength'],
+            maxLineGap=self.params['maxLineGap']
+        )
+        detections = []
+        if lines is not None:
+            for line in lines:
+                x1, y1, x2, y2 = line[0]
+                detections.append({
+                    'x1': float(x1),
+                    'y1': float(y1),
+                    'x2': float(x2),
+                    'y2': float(y2),
+                    'confidence': 1.0
+                })
+        return {'lines': detections}
+class DeepLSDDetector:
+    """Line detector using DeepLSD model"""
+    def __init__(self, model_path: str):
+        if not DEEPLSD_AVAILABLE:
+            raise ImportError("DeepLSD is not available")
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.model = self._load_model(model_path)
+    def _load_model(self, model_path: str) -> DeepLSD:
+        """Load the DeepLSD model"""
+        try:
+            ckpt = torch.load(model_path, map_location=self.device)
+            model = DeepLSD()
+            model.load_state_dict(ckpt['model'])
+            return model.to(self.device).eval()
+        except Exception as e:
+            logger.error(f"Failed to load DeepLSD model: {str(e)}")
+            raise
+    def detect(self, image: np.ndarray) -> Dict:
+        """Detect lines using DeepLSD"""
+        try:
+            # Convert to tensor
+            if len(image.shape) == 3:
+                gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+            else:
+                gray = image
+            tensor = torch.tensor(gray, dtype=torch.float32, device=self.device)[None, None] / 255.0
+            # Run inference
+            with torch.no_grad():
+                output = self.model({"image": tensor})
+                lines = output["lines"][0]  # [N, 2, 2] array
+            # Convert to standard format
+            detections = []
+            for line in lines:
+                (x1, y1), (x2, y2) = line
+                detections.append({
+                    'x1': float(x1),
+                    'y1': float(y1),
+                    'x2': float(x2),
+                    'y2': float(y2),
+                    'confidence': float(output.get("confidence", [1.0])[0])
+                })
+            return {'lines': detections}
+        except Exception as e:
+            logger.error(f"Error in DeepLSD detection: {str(e)}")
+            return {'lines': []}

logger.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import logging
+from loguru import logger
+import sys
+def get_logger(name: str):
+    """Configure and return a logger instance"""
+    # Remove any existing handlers
+    logger.remove()
+    # Add a new handler with custom format
+    logger.add(
+        sys.stderr,
+        format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
+        level="INFO"
+    )
+    # Add file handler for persistent logging
+    logger.add(
+        "logs/app.log",
+        rotation="500 MB",
+        retention="10 days",
+        level="DEBUG",
+        compression="zip"
+    )
+    # Create logger for the module
+    module_logger = logger.bind(name=name)
+    return module_logger

pdf_processor.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import os
+import fitz  # PyMuPDF
+import cv2
+import numpy as np
+from pathlib import Path
+import logging
+from storage import StorageInterface
+import shutil
+logger = logging.getLogger(__name__)
+class DocumentProcessor:
+    def __init__(self, storage: StorageInterface):
+        self.storage = storage
+        self.target_dpi = 600  # Fixed at 600 DPI
+    def clean_results_folder(self, output_dir: str):
+        """Clean the results directory before processing new files"""
+        if os.path.exists(output_dir):
+            try:
+                shutil.rmtree(output_dir)
+                logger.info(f"Cleaned results directory: {output_dir}")
+            except Exception as e:
+                logger.error(f"Error cleaning results directory: {str(e)}")
+                raise
+        os.makedirs(output_dir, exist_ok=True)
+    def process_document(self, file_path: str, output_dir: str) -> list:
+        """Process document (PDF/PNG/JPG) and return paths to processed pages"""
+        # Clean results folder first
+        self.clean_results_folder(output_dir)
+        file_ext = Path(file_path).suffix.lower()
+        if file_ext == '.pdf':
+            return self._process_pdf(file_path, output_dir)
+        elif file_ext in ['.png', '.jpg', '.jpeg']:
+            return self._process_image(file_path, output_dir)
+        else:
+            raise ValueError(f"Unsupported file format: {file_ext}")
+    def _process_pdf(self, pdf_path: str, output_dir: str) -> list:
+        """Process PDF document"""
+        processed_pages = []
+        base_name = Path(pdf_path).stem
+        try:
+            # Open PDF
+            doc = fitz.open(pdf_path)
+            for page_num in range(len(doc)):
+                page = doc[page_num]
+                # Get high-res image
+                pix = page.get_pixmap(matrix=fitz.Matrix(self.target_dpi/72, self.target_dpi/72))
+                # Convert to numpy array
+                img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
+                if pix.n == 4:  # RGBA
+                    img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
+                # Save image
+                output_path = os.path.join(output_dir, f"{base_name}_page_{page_num + 1}.png")
+                self._save_image(img, output_path)
+                processed_pages.append(output_path)
+            return processed_pages
+        except Exception as e:
+            logger.error(f"Error processing PDF: {str(e)}")
+            raise
+    def _process_image(self, image_path: str, output_dir: str) -> list:
+        """Process single image"""
+        try:
+            # Read image
+            img = cv2.imread(image_path)
+            if img is None:
+                raise ValueError(f"Could not read image: {image_path}")
+            # Convert BGR to RGB
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+            # Calculate scaling factor for 600 DPI
+            current_dpi = 72  # Assume standard screen resolution
+            scale = self.target_dpi / current_dpi
+            # Resize image
+            img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
+            # Save image
+            base_name = Path(image_path).stem
+            output_path = os.path.join(output_dir, f"{base_name}_page_1.png")
+            self._save_image(img, output_path)
+            return [output_path]
+        except Exception as e:
+            logger.error(f"Error processing image: {str(e)}")
+            raise
+    def _save_image(self, img: np.ndarray, output_path: str):
+        """Save processed image"""
+        # Encode image with high quality PNG
+        _, buffer = cv2.imencode('.png', cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
+        self.storage.save_file(output_path, buffer.tobytes())
+if __name__ == "__main__":
+    from storage import StorageFactory
+    # Initialize storage and processor
+    storage = StorageFactory.get_storage()
+    processor = DocumentProcessor(storage)
+    # Process PDF
+    pdf_path = "samples/001.pdf"
+    output_dir = "results"  # Changed from "processed_pages" to "results"
+    try:
+        # Ensure output directory exists
+        os.makedirs(output_dir, exist_ok=True)
+        results = processor.process_document(
+            file_path=pdf_path,
+            output_dir=output_dir
+        )
+        # Print detailed results
+        print("\nProcessing Results:")
+        print(f"Output Directory: {os.path.abspath(output_dir)}")
+        for page_path in results:
+            abs_path = os.path.abspath(page_path)
+            file_size = os.path.getsize(page_path) / (1024 * 1024)  # Convert to MB
+            print(f"- {os.path.basename(page_path)} ({file_size:.2f} MB)")
+        # Calculate total size of output
+        total_size = sum(os.path.getsize(os.path.join(output_dir, f))
+                        for f in os.listdir(output_dir)) / (1024 * 1024)
+        print(f"\nTotal output size: {total_size:.2f} MB")
+    except Exception as e:
+        logger.error(f"Error processing PDF: {str(e)}")
+        raise

requirements.txt ADDED Viewed

	@@ -0,0 +1,54 @@

+# Core dependencies
+gradio>=3.50.2
+numpy>=1.24.0
+Pillow>=10.0.0
+opencv-python==4.8.1.78
+PyMuPDF==1.23.8  # for PDF processing
+# OCR and Text Detection
+python-doctr==0.11.0  # Latest stable version
+easyocr==1.7.1
+pytesseract==0.3.10
+# Deep Learning
+torch>=2.0.0
+torchvision>=0.15.0
+tensorflow==2.11.0  # Optional with torch
+# Graph Processing
+networkx>=3.0
+matplotlib>=3.7.0
+# Utilities
+python-dotenv>=1.0.0
+tqdm==4.66.1
+loguru==0.7.2
+scipy==1.11.4
+pypdfium2==4.20.0
+weasyprint==60.1
+# Storage and Processing
+azure-storage-blob==12.19.0
+azure-core==1.29.5
+# OCR Engines
+ultralytics==8.0.0  # for YOLO models
+deeplsd @ git+https://github.com/cvg/DeepLSD.git
+omegaconf>=2.3.0  # Required by DeepLSD
+pytlsd @ git+https://github.com/iago-suarez/pytlsd.git  # Required by DeepLSD
+# AI/Chat
+openai>=1.0.0  # For ChatGPT integration
+uuid>=1.30
+shapely>=1.8.0  # for geometry operations
+# Added from the code block
+requests>=2.31.0
+# Added from the code block
+opencv-python-headless>=4.8.0
+# Added from the code block
+huggingface-hub>=0.19.0
+transformers>=4.35.0
+gradio==5.15.0

results/002_page_1_aggregated.json ADDED Viewed

The diff for this file is too large to render. See raw diff

results/002_page_1_detected_symbols.json ADDED Viewed

The diff for this file is too large to render. See raw diff

storage.py ADDED Viewed

	@@ -0,0 +1,208 @@

+import os
+import shutil
+from azure.storage.blob import BlobServiceClient
+from abc import ABC, abstractmethod
+import json
+class StorageInterface(ABC):
+    @abstractmethod
+    def save_file(self, file_path: str, content: bytes) -> str:
+        pass
+    @abstractmethod
+    def load_file(self, file_path: str) -> bytes:
+        pass
+    @abstractmethod
+    def list_files(self, directory: str) -> list[str]:
+        pass
+    @abstractmethod
+    def file_exists(self, file_path: str) -> bool:
+        pass
+    @abstractmethod
+    def delete_file(self, file_path: str) -> None:
+        pass
+    @abstractmethod
+    def create_directory(self, directory: str) -> None:
+        pass
+    @abstractmethod
+    def delete_directory(self, directory: str) -> None:
+        pass
+    @abstractmethod
+    def upload(self, local_path: str, destination_path: str) -> None:
+        pass
+    @abstractmethod
+    def append_file(self, file_path: str, content: bytes) -> None:
+        pass
+    @abstractmethod
+    def get_modified_time(self, file_path: str) -> float:
+        pass
+    @abstractmethod
+    def directory_exists(self, directory: str) -> bool:
+        pass
+    def load_json(self, file_path):
+        """Load and parse JSON file."""
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                data = json.load(f)
+            return data
+        except Exception as e:
+            print(f"Error loading JSON from {file_path}: {str(e)}")
+            return None
+class LocalStorage(StorageInterface):
+    def save_file(self, file_path: str, content: bytes) -> str:
+        os.makedirs(os.path.dirname(file_path), exist_ok=True)
+        with open(file_path, 'wb') as f:
+            f.write(content)
+        return file_path
+    def load_file(self, file_path: str) -> bytes:
+        with open(file_path, 'rb') as f:
+            return f.read()
+    def list_files(self, directory: str) -> list[str]:
+        return [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
+    def file_exists(self, file_path: str) -> bool:
+        return os.path.exists(file_path)
+    def delete_file(self, file_path: str) -> None:
+        os.remove(file_path)
+    def create_directory(self, directory: str) -> None:
+        os.makedirs(directory, exist_ok=True)
+    def delete_directory(self, directory: str) -> None:
+        shutil.rmtree(directory)
+    def upload(self, local_path: str, destination_path: str) -> None:
+        os.makedirs(os.path.dirname(destination_path), exist_ok=True)
+        shutil.copy(local_path, destination_path)
+    def append_file(self, file_path: str, content: bytes) -> None:
+        os.makedirs(os.path.dirname(file_path), exist_ok=True)
+        with open(file_path, 'ab') as f:
+            f.write(content)
+    def get_modified_time(self, file_path: str) -> float:
+        return os.path.getmtime(file_path)
+    def directory_exists(self, directory: str) -> bool:
+        return self.file_exists(directory)
+class BlobStorage(StorageInterface):
+    """
+        Writes to blob storage, using local disk as a cache
+        TODO: Allow configuration of temp dir instead of just using the same paths in both local and remote
+    """
+    def __init__(self, connection_string: str, container_name: str):
+        self.blob_service_client = BlobServiceClient.from_connection_string(connection_string)
+        self.container_client = self.blob_service_client.get_container_client(container_name)
+        self.local_storage = LocalStorage()
+    def download(self, file_path: str) -> bytes:
+        blob_client = self.container_client.get_blob_client(file_path)
+        return blob_client.download_blob().readall()
+    def sync(self, file_path: str) -> None:
+        if not self.local_storage.file_exists(file_path):
+            print(f"DEBUG: missing local version of {file_path} - downloading")
+            self.local_storage.save_file(file_path, self.download(file_path))
+        else:
+            local_timestamp = self.local_storage.get_modified_time(file_path)
+            remote_timestamp = self.get_modified_time(file_path)
+            if local_timestamp < remote_timestamp:
+                # We always write remotely before writing locally, so we expect local_timestamp to be > remote timestamp
+                print(f"DBEUG: local version of {file_path} out of date - downloading")
+                self.local_storage.save_file(file_path, self.download(file_path))
+    def save_file(self, file_path: str, content: bytes) -> str:
+        blob_client = self.container_client.get_blob_client(file_path)
+        blob_client.upload_blob(content, overwrite=True)
+        self.local_storage.save_file(file_path, content)
+        return file_path
+    def load_file(self, file_path: str) -> bytes:
+        self.sync(file_path)
+        return self.local_storage.load_file(file_path)
+    def list_files(self, directory: str) -> list[str]:
+        return [blob.name for blob in self.container_client.list_blobs(name_starts_with=directory)]
+    def file_exists(self, file_path: str) -> bool:
+        blob_client = self.container_client.get_blob_client(file_path)
+        return blob_client.exists()
+    def delete_file(self, file_path: str) -> None:
+        self.local_storage.delete_file(file_path)
+        blob_client = self.container_client.get_blob_client(file_path)
+        blob_client.delete_blob()
+    def create_directory(self, directory: str) -> None:
+        # Blob storage doesn't have directories, so only create it locally
+        self.local_storage.create_directory(directory)
+    def delete_directory(self, directory: str) -> None:
+        self.local_storage.delete_directory(directory)
+        blobs_to_delete = self.container_client.list_blobs(name_starts_with=directory)
+        for blob in blobs_to_delete:
+            self.container_client.delete_blob(blob.name)
+    def upload(self, local_path: str, destination_path: str) -> None:
+        with open(local_path, "rb") as data:
+            blob_client = self.container_client.get_blob_client(destination_path)
+            blob_client.upload_blob(data, overwrite=True)
+        self.local_storage.upload(local_path, destination_path)
+    def append_file(self, file_path: str, content: bytes) -> None:
+        blob_client = self.container_client.get_blob_client(file_path)
+        if not blob_client.exists():
+            blob_client.create_append_blob()
+        else:
+            self.sync(file_path)
+        blob_client.append_block(content)
+        self.local_storage.append_file(file_path, content)
+    def get_modified_time(self, file_path: str) -> float:
+        blob_client = self.container_client.get_blob_client(file_path)
+        properties = blob_client.get_blob_properties()
+        # Convert the UTC datetime to a UNIX timestamp
+        return properties.last_modified.timestamp()
+    def directory_exists(self, directory: str) -> bool:
+        blobs = self.container_client.list_blobs(name_starts_with=directory)
+        return next(blobs, None) is not None
+class StorageFactory:
+    @staticmethod
+    def get_storage() -> StorageInterface:
+        storage_type = os.getenv('STORAGE_TYPE', 'local').lower()
+        if storage_type == 'local':
+            return LocalStorage()
+        elif storage_type == 'blob':
+            connection_string = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
+            container_name = os.getenv('AZURE_STORAGE_CONTAINER_NAME')
+            if not connection_string or not container_name:
+                raise ValueError("Azure Blob Storage connection string and container name must be set")
+            return BlobStorage(connection_string, container_name)
+        else:
+            raise ValueError(f"Unsupported storage type: {storage_type}")

symbol_detection.py ADDED Viewed

	@@ -0,0 +1,454 @@

+import cv2
+import json
+import uuid
+import os
+import logging
+from ultralytics import YOLO
+from tqdm import tqdm
+from storage import StorageInterface
+import numpy as np
+from typing import Tuple, List, Dict, Any
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Constants
+MODEL_PATHS = {
+    "model1": "models/Intui_SDM_41.pt",
+    "model2": "models/Intui_SDM_20.pt"  # Add your second model path here
+}
+MAX_DIMENSION = 1280
+CONFIDENCE_THRESHOLDS = [0.1, 0.3, 0.5, 0.7, 0.9]
+TEXT_COLOR = (0, 0, 255)    # Red color for text
+BOX_COLOR = (255, 0, 0)     # Red color for box (no transparency)
+BG_COLOR = (255, 255, 255, 0.6)  # Semi-transparent white for text background
+THICKNESS = 1               # Thin text thickness
+BOX_THICKNESS = 2          # Box line thickness
+MIN_FONT_SCALE = 0.2       # Minimum font scale
+MAX_FONT_SCALE = 1.0       # Maximum font scale
+TEXT_PADDING = 20          # Increased padding between text elements
+OVERLAP_THRESHOLD = 0.3    # Threshold for detecting text overlap
+def preprocess_image_for_symbol_detection(image_cv: np.ndarray) -> np.ndarray:
+    """Preprocess the image for symbol detection."""
+    gray = cv2.cvtColor(image_cv, cv2.COLOR_BGR2GRAY)
+    equalized = cv2.equalizeHist(gray)
+    filtered = cv2.bilateralFilter(equalized, 9, 75, 75)
+    edges = cv2.Canny(filtered, 100, 200)
+    preprocessed_image = cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)
+    return preprocessed_image
+def evaluate_detections(detections_list: List[Dict[str, Any]]) -> int:
+    """Evaluate the quality of detections."""
+    return len(detections_list)
+def resize_image_with_aspect_ratio(image_cv: np.ndarray, max_dimension: int) -> Tuple[np.ndarray, int, int]:
+    """Resize the image while maintaining the aspect ratio."""
+    original_height, original_width = image_cv.shape[:2]
+    if max(original_width, original_height) > max_dimension:
+        scale = max_dimension / float(max(original_width, original_height))
+        new_width = int(original_width * scale)
+        new_height = int(original_height * scale)
+        image_cv = cv2.resize(image_cv, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
+    else:
+        new_width, new_height = original_width, original_height
+    return image_cv, new_width, new_height
+def merge_detections(all_detections: List[Dict]) -> List[Dict]:
+    """
+    Merge detections from all models, keeping only the highest confidence detection
+    when duplicates are found using IoU.
+    """
+    if not all_detections:
+        return []
+    # Sort by confidence
+    all_detections.sort(key=lambda x: x['confidence'], reverse=True)
+    # Keep track of which detections to keep
+    keep = [True] * len(all_detections)
+    def calculate_iou(box1, box2):
+        """Calculate Intersection over Union (IoU) between two boxes."""
+        x1 = max(box1[0], box2[0])
+        y1 = max(box1[1], box2[1])
+        x2 = min(box1[2], box2[2])
+        y2 = min(box1[3], box2[3])
+        intersection = max(0, x2 - x1) * max(0, y2 - y1)
+        area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
+        area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
+        union = area1 + area2 - intersection
+        return intersection / union if union > 0 else 0
+    # Apply NMS and keep only highest confidence detection
+    for i in range(len(all_detections)):
+        if not keep[i]:
+            continue
+        current_box = all_detections[i]['bbox']
+        current_label = all_detections[i]['original_label']
+        for j in range(i + 1, len(all_detections)):
+            if not keep[j]:
+                continue
+            # Check if same label type and high IoU
+            if (all_detections[j]['original_label'] == current_label and
+                calculate_iou(current_box, all_detections[j]['bbox']) > 0.5):
+                # Since list is sorted by confidence, i will always have higher confidence than j
+                keep[j] = False
+                logging.info(f"Removing duplicate detection of {current_label} with lower confidence "
+                           f"({all_detections[j]['confidence']:.2f} < {all_detections[i]['confidence']:.2f})")
+    # Add kept detections to final list
+    merged_detections = [det for i, det in enumerate(all_detections) if keep[i]]
+    return merged_detections
+def calculate_font_scale(image_width: int, bbox_width: int) -> float:
+    """
+    Calculate appropriate font scale based on image and bbox dimensions.
+    """
+    base_scale = 0.7  # Increased base scale for better visibility
+    # Adjust font size based on image width and bbox width
+    width_ratio = image_width / MAX_DIMENSION
+    bbox_ratio = bbox_width / image_width
+    # Calculate adaptive scale with increased multipliers
+    adaptive_scale = base_scale * max(width_ratio, 0.5) * max(bbox_ratio * 6, 0.7)
+    # Ensure font scale stays within reasonable bounds
+    return min(max(adaptive_scale, MIN_FONT_SCALE), MAX_FONT_SCALE)
+def check_overlap(rect1, rect2):
+    """Check if two rectangles overlap."""
+    x1_1, y1_1, x2_1, y2_1 = rect1
+    x1_2, y1_2, x2_2, y2_2 = rect2
+    return not (x2_1 < x1_2 or x1_1 > x2_2 or y2_1 < y1_2 or y1_1 > y2_2)
+def draw_annotation(
+    image: np.ndarray,
+    bbox: List[int],
+    text: str,
+    confidence: float,
+    model_source: str,
+    existing_annotations: List[tuple] = None
+) -> None:
+    """
+    Draw annotation with no background and thin fonts.
+    """
+    if existing_annotations is None:
+        existing_annotations = []
+    x1, y1, x2, y2 = bbox
+    bbox_width = x2 - x1
+    image_width = image.shape[1]
+    image_height = image.shape[0]
+    # Calculate adaptive font scale
+    font_scale = calculate_font_scale(image_width, bbox_width)
+    # Simplify the annotation text
+    annotation_text = f'{text}\n{confidence:.0f}%'
+    lines = annotation_text.split('\n')
+    # Calculate text dimensions
+    font = cv2.FONT_HERSHEY_SIMPLEX
+    max_width = 0
+    total_height = 0
+    line_heights = []
+    for line in lines:
+        (width, height), baseline = cv2.getTextSize(
+            line, font, font_scale, THICKNESS
+        )
+        max_width = max(max_width, width)
+        line_height = height + baseline + TEXT_PADDING
+        line_heights.append(line_height)
+        total_height += line_height
+    # Calculate initial text position with increased padding
+    padding = TEXT_PADDING
+    rect_x1 = max(0, x1 - padding)
+    rect_x2 = min(image_width, x1 + max_width + padding * 2)
+    # Try different positions to avoid overlap
+    positions = [
+        ('top', y1 - total_height - padding),
+        ('bottom', y2 + padding),
+        ('top_shifted', y1 - total_height - padding * 2),
+        ('bottom_shifted', y2 + padding * 2)
+    ]
+    final_position = None
+    for pos_name, y_pos in positions:
+        if y_pos < 0 or y_pos + total_height > image_height:
+            continue
+        rect = (rect_x1, y_pos, rect_x2, y_pos + total_height)
+        overlap = False
+        for existing_rect in existing_annotations:
+            if check_overlap(rect, existing_rect):
+                overlap = True
+                break
+        if not overlap:
+            final_position = (pos_name, y_pos)
+            existing_annotations.append(rect)
+            break
+    # If no non-overlapping position found, use side position
+    if final_position is None:
+        rect_x1 = max(0, x1 + bbox_width + padding)
+        rect_x2 = min(image_width, rect_x1 + max_width + padding * 2)
+        y_pos = y1
+        final_position = ('side', y_pos)
+    rect_y1 = final_position[1]
+    # Draw bounding box (no transparency)
+    cv2.rectangle(image, (x1, y1), (x2, y2), BOX_COLOR, BOX_THICKNESS)
+    # Draw text directly without background
+    text_y = rect_y1 + line_heights[0] - padding
+    for i, line in enumerate(lines):
+        # Draw text with thin lines
+        cv2.putText(
+            image,
+            line,
+            (rect_x1 + padding, text_y + sum(line_heights[:i])),
+            font,
+            font_scale,
+            TEXT_COLOR,
+            THICKNESS,
+            cv2.LINE_AA
+        )
+def run_detection_with_optimal_threshold(
+    image_path: str,
+    results_dir: str = "results",
+    file_name: str = "",
+    apply_preprocessing: bool = False,
+    resize_image: bool = True,  # Changed default to True
+    storage: StorageInterface = None
+) -> Tuple[str, str, str, List[int]]:
+    """Run detection with multiple models and merge results."""
+    try:
+        image_data = storage.load_file(image_path)
+        nparr = np.frombuffer(image_data, np.uint8)
+        original_image_cv = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+        image_cv = original_image_cv.copy()
+        if resize_image:
+            logging.info("Resizing image for detection with aspect ratio...")
+            image_cv, resized_width, resized_height = resize_image_with_aspect_ratio(image_cv, MAX_DIMENSION)
+        else:
+            logging.info("Skipping image resizing...")
+            resized_height, resized_width = original_image_cv.shape[:2]
+        if apply_preprocessing:
+            logging.info("Preprocessing image for symbol detection...")
+            image_cv = preprocess_image_for_symbol_detection(image_cv)
+        else:
+            logging.info("Skipping image preprocessing for symbol detection...")
+        all_detections = []
+        # Run detection with each model
+        for model_name, model_path in MODEL_PATHS.items():
+            logging.info(f"Running detection with model: {model_name}")
+            if not model_path:
+                logging.warning(f"No model path found for {model_name}")
+                continue
+            model = YOLO(model_path)
+            best_confidence_threshold = 0.5
+            best_detections_list = []
+            best_metric = -1
+            for confidence_threshold in CONFIDENCE_THRESHOLDS:
+                logging.info(f"Running detection with confidence threshold: {confidence_threshold}...")
+                results = model.predict(source=image_cv, imgsz=MAX_DIMENSION)
+                detections_list = []
+                for result in results:
+                    for box in result.boxes:
+                        confidence = float(box.conf[0])
+                        if confidence >= confidence_threshold:
+                            x1, y1, x2, y2 = map(float, box.xyxy[0])
+                            class_id = int(box.cls[0])
+                            label = result.names[class_id]
+                            scale_x = original_image_cv.shape[1] / resized_width
+                            scale_y = original_image_cv.shape[0] / resized_height
+                            x1 *= scale_x
+                            x2 *= scale_x
+                            y1 *= scale_y
+                            y2 *= scale_y
+                            x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
+                            split_label = label.split('_')
+                            if len(split_label) >= 3:
+                                category = split_label[0]
+                                type_ = split_label[1]
+                                new_label = '_'.join(split_label[2:])
+                            elif len(split_label) == 2:
+                                category = split_label[0]
+                                type_ = split_label[1]
+                                new_label = split_label[1]
+                            elif len(split_label) == 1:
+                                category = split_label[0]
+                                type_ = "Unknown"
+                                new_label = split_label[0]
+                            else:
+                                logging.warning(f"Unexpected label format: {label}. Skipping this detection.")
+                                continue
+                            detection_id = str(uuid.uuid4())
+                            detection_info = {
+                                "symbol_id": detection_id,
+                                "class_id": class_id,
+                                "original_label": label,
+                                "category": category,
+                                "type": type_,
+                                "label": new_label,
+                                "confidence": confidence,
+                                "bbox": [x1, y1, x2, y2],
+                                "model_source": model_name
+                            }
+                            detections_list.append(detection_info)
+                metric = evaluate_detections(detections_list)
+                if metric > best_metric:
+                    best_metric = metric
+                    best_confidence_threshold = confidence_threshold
+                    best_detections_list = detections_list
+            all_detections.extend(best_detections_list)
+        # Merge detections from both models
+        merged_detections = merge_detections(all_detections)
+        logging.info(f"Total detections after merging: {len(merged_detections)}")
+        # Draw annotations on the image
+        existing_annotations = []
+        for det in merged_detections:
+            draw_annotation(
+                original_image_cv,
+                det["bbox"],
+                det["original_label"],
+                det["confidence"] * 100,
+                det["model_source"],
+                existing_annotations
+            )
+        # Save results
+        storage.create_directory(results_dir)
+        file_name_without_extension = os.path.splitext(file_name)[0]
+        # Prepare output JSON
+        total_detected_symbols = len(merged_detections)
+        class_counts = {}
+        for det in merged_detections:
+            full_label = det["original_label"]
+            class_counts[full_label] = class_counts.get(full_label, 0) + 1
+        output_json = {
+            "total_detected_symbols": total_detected_symbols,
+            "details": class_counts,
+            "detections": merged_detections
+        }
+        # Save JSON and image
+        detection_json_path = os.path.join(
+            results_dir, f'{file_name_without_extension}_detected_symbols.json'
+        )
+        storage.save_file(
+            detection_json_path,
+            json.dumps(output_json, indent=4).encode('utf-8')
+        )
+        # Save with maximum quality
+        detection_image_path = os.path.join(
+            results_dir, f'{file_name_without_extension}_detected_symbols.png'  # Using PNG for transparency
+        )
+        # Configure image encoding parameters for maximum quality
+        encode_params = [
+            cv2.IMWRITE_PNG_COMPRESSION, 0  # No compression for PNG
+        ]
+        # Save as high-quality PNG to preserve transparency
+        _, img_encoded = cv2.imencode(
+            '.png',
+            original_image_cv,
+            encode_params
+        )
+        storage.save_file(detection_image_path, img_encoded.tobytes())
+        # Calculate diagram bbox from merged detections
+        diagram_bbox = [
+            min([det['bbox'][0] for det in merged_detections], default=0),
+            min([det['bbox'][1] for det in merged_detections], default=0),
+            max([det['bbox'][2] for det in merged_detections], default=0),
+            max([det['bbox'][3] for det in merged_detections], default=0)
+        ]
+        # Scale up image if it's too small
+        min_width = 2000  # Minimum width for good visibility
+        if original_image_cv.shape[1] < min_width:
+            scale_factor = min_width / original_image_cv.shape[1]
+            new_width = min_width
+            new_height = int(original_image_cv.shape[0] * scale_factor)
+            original_image_cv = cv2.resize(
+                original_image_cv,
+                (new_width, new_height),
+                interpolation=cv2.INTER_CUBIC
+            )
+        return (
+            detection_image_path,
+            detection_json_path,
+            f"Total detections after merging: {total_detected_symbols}",
+            diagram_bbox
+        )
+    except Exception as e:
+        logging.error(f"An error occurred: {e}")
+        return "Error during detection", None, None, None
+if __name__ == "__main__":
+    from storage import StorageFactory
+    uploaded_file_path = "processed_pages/10219-1-DG-BC-00011.01-REV_A_page_1_text.png"
+    results_dir = "results"
+    apply_symbol_preprocessing = False
+    resize_image = True
+    storage = StorageFactory.get_storage()
+    (
+        detection_image_path,
+        detection_json_path,
+        detection_log_message,
+        diagram_bbox
+    ) = run_detection_with_optimal_threshold(
+        uploaded_file_path,
+        results_dir=results_dir,
+        file_name=os.path.basename(uploaded_file_path),
+        apply_preprocessing=apply_symbol_preprocessing,
+        resize_image=resize_image,
+        storage=storage
+    )
+    logging.info("Detection Image Path: %s", detection_image_path)
+    logging.info("Detection JSON Path: %s", detection_json_path)
+    logging.info("Detection Log Message: %s", detection_log_message)
+    logging.info("Diagram BBox: %s", diagram_bbox)
+    logging.info("Done!")

text_detection_combined.py ADDED Viewed

	@@ -0,0 +1,563 @@

+import os
+import json
+import io
+from PIL import Image, ImageDraw, ImageFont
+import numpy as np
+from doctr.models import ocr_predictor
+import pytesseract
+import easyocr
+from storage import StorageInterface
+import re
+import logging
+from pathlib import Path
+import cv2
+import traceback
+from typing import Tuple
+# Initialize models
+try:
+    logging.basicConfig(level=logging.INFO)
+    logger = logging.getLogger(__name__)
+    doctr_model = ocr_predictor(pretrained=True)
+    easyocr_reader = easyocr.Reader(['en'])
+    logging.info("All OCR models loaded successfully")
+except Exception as e:
+    logging.error(f"Error loading OCR models: {e}")
+# Combined patterns from all approaches
+TEXT_PATTERNS = {
+    'Line_Number': r"(?:\d{1,5}[-](?:[A-Z]{2,4})[-]\d{1,3})",
+    'Equipment_Tag': r"(?:[A-Z]{1,3}[-][A-Z0-9]{1,4}[-]\d{1,3})",
+    'Instrument_Tag': r"(?:\d{2,3}[-][A-Z]{2,4}[-]\d{2,3})",
+    'Valve_Number': r"(?:[A-Z]{1,2}[-]\d{3})",
+    'Pipe_Size': r"(?:\d{1,2}[\"])",
+    'Flow_Direction': r"(?:FROM|TO)",
+    'Service_Description': r"(?:STEAM|WATER|AIR|GAS|DRAIN)",
+    'Process_Instrument': r"(?:[0-9]{2,3}(?:-[A-Z]{2,3})?-[0-9]{2,3}|[A-Z]{2,3}-[0-9]{2,3})",
+    'Nozzle': r"(?:N[0-9]{1,2}|MH)",
+    'Pipe_Connector': r"(?:[0-9]{1,5}|[A-Z]{1,2}[0-9]{2,5})"
+}
+def detect_text_combined(image, confidence_threshold=0.3):
+    """Combine results from all three OCR approaches"""
+    results = []
+    # 1. Tesseract Detection
+    tesseract_results = detect_with_tesseract(image)
+    for result in tesseract_results:
+        result['source'] = 'tesseract'
+        results.append(result)
+    # 2. EasyOCR Detection
+    easyocr_results = detect_with_easyocr(image)
+    for result in easyocr_results:
+        result['source'] = 'easyocr'
+        results.append(result)
+    # 3. DocTR Detection
+    doctr_results = detect_with_doctr(image)
+    for result in doctr_results:
+        result['source'] = 'doctr'
+        results.append(result)
+    # Merge overlapping detections
+    merged_results = merge_overlapping_detections(results)
+    # Classify and filter results
+    classified_results = []
+    for result in merged_results:
+        if result['confidence'] >= confidence_threshold:
+            text_type = classify_text(result['text'])
+            result['text_type'] = text_type
+            classified_results.append(result)
+    return classified_results
+def generate_detailed_summary(results):
+    """Generate detailed detection summary"""
+    summary = {
+        'total_detections': len(results),
+        'by_type': {},
+        'by_source': {
+            'tesseract': {
+                'count': 0,
+                'by_type': {},
+                'avg_confidence': 0.0
+            },
+            'easyocr': {
+                'count': 0,
+                'by_type': {},
+                'avg_confidence': 0.0
+            },
+            'doctr': {
+                'count': 0,
+                'by_type': {},
+                'avg_confidence': 0.0
+            }
+        },
+        'confidence_ranges': {
+            '0.9-1.0': 0,
+            '0.8-0.9': 0,
+            '0.7-0.8': 0,
+            '0.6-0.7': 0,
+            '0.5-0.6': 0,
+            '<0.5': 0
+        },
+        'detected_items': []
+    }
+    # Initialize type counters
+    for pattern_type in TEXT_PATTERNS.keys():
+        summary['by_type'][pattern_type] = {
+            'count': 0,
+            'avg_confidence': 0.0,
+            'by_source': {
+                'tesseract': 0,
+                'easyocr': 0,
+                'doctr': 0
+            },
+            'items': []
+        }
+        # Initialize source-specific type counters
+        for source in summary['by_source'].keys():
+            summary['by_source'][source]['by_type'][pattern_type] = 0
+    # Process each detection
+    source_confidences = {'tesseract': [], 'easyocr': [], 'doctr': []}
+    for result in results:
+        # Get source and confidence
+        source = result['source']
+        conf = result['confidence']
+        text_type = result['text_type']
+        # Update source statistics
+        summary['by_source'][source]['count'] += 1
+        source_confidences[source].append(conf)
+        # Update confidence ranges
+        if conf >= 0.9: summary['confidence_ranges']['0.9-1.0'] += 1
+        elif conf >= 0.8: summary['confidence_ranges']['0.8-0.9'] += 1
+        elif conf >= 0.7: summary['confidence_ranges']['0.7-0.8'] += 1
+        elif conf >= 0.6: summary['confidence_ranges']['0.6-0.7'] += 1
+        elif conf >= 0.5: summary['confidence_ranges']['0.5-0.6'] += 1
+        else: summary['confidence_ranges']['<0.5'] += 1
+        # Update type statistics
+        if text_type in summary['by_type']:
+            type_stats = summary['by_type'][text_type]
+            type_stats['count'] += 1
+            type_stats['by_source'][source] += 1
+            summary['by_source'][source]['by_type'][text_type] += 1
+            type_stats['items'].append({
+                'text': result['text'],
+                'confidence': conf,
+                'source': source,
+                'bbox': result['bbox']
+            })
+        # Add to detected items
+        summary['detected_items'].append({
+            'text': result['text'],
+            'type': text_type,
+            'confidence': conf,
+            'source': source,
+            'bbox': result['bbox']
+        })
+    # Calculate average confidences
+    for source, confs in source_confidences.items():
+        if confs:
+            summary['by_source'][source]['avg_confidence'] = sum(confs) / len(confs)
+    # Calculate average confidences for each type
+    for text_type, stats in summary['by_type'].items():
+        if stats['items']:
+            stats['avg_confidence'] = sum(item['confidence'] for item in stats['items']) / len(stats['items'])
+    return summary
+def process_drawing(image_path: str, output_dir: str, storage: StorageInterface) -> Tuple[dict, dict]:
+    """Process drawing with text detection.
+    Args:
+        image_path: Path to image file
+        output_dir: Directory to save results
+        storage: Optional storage handler
+    """
+    try:
+        # Read image using cv2
+        image = cv2.imread(image_path)
+        if image is None:
+            raise ValueError(f"Could not read image: {image_path}")
+        # Create annotated copy
+        annotated_image = image.copy()
+        # Initialize results and summary
+        text_results = {
+            'file_name': image_path,
+            'detections': []
+        }
+        text_summary = {
+            'total_detections': 0,
+            'by_source': {
+                'tesseract': {'count': 0, 'avg_confidence': 0.0},
+                'easyocr': {'count': 0, 'avg_confidence': 0.0},
+                'doctr': {'count': 0, 'avg_confidence': 0.0}
+            },
+            'by_type': {
+                'equipment_tag': {'count': 0, 'avg_confidence': 0.0},
+                'line_number': {'count': 0, 'avg_confidence': 0.0},
+                'instrument_tag': {'count': 0, 'avg_confidence': 0.0},
+                'valve_number': {'count': 0, 'avg_confidence': 0.0},
+                'pipe_size': {'count': 0, 'avg_confidence': 0.0},
+                'flow_direction': {'count': 0, 'avg_confidence': 0.0},
+                'service_description': {'count': 0, 'avg_confidence': 0.0},
+                'process_instrument': {'count': 0, 'avg_confidence': 0.0},
+                'nozzle': {'count': 0, 'avg_confidence': 0.0},
+                'pipe_connector': {'count': 0, 'avg_confidence': 0.0},
+                'other': {'count': 0, 'avg_confidence': 0.0}
+            }
+        }
+        # Run OCR with different engines
+        tesseract_results = detect_with_tesseract(image)
+        easyocr_results = detect_with_easyocr(image)
+        doctr_results = detect_with_doctr(image)
+        # Combine results
+        all_detections = []
+        all_detections.extend([(res, 'tesseract') for res in tesseract_results])
+        all_detections.extend([(res, 'easyocr') for res in easyocr_results])
+        all_detections.extend([(res, 'doctr') for res in doctr_results])
+        # Process each detection
+        for detection, source in all_detections:
+            # Update text_results
+            text_results['detections'].append({
+                'text': detection['text'],
+                'bbox': detection['bbox'],
+                'confidence': detection['confidence'],
+                'source': source
+            })
+            # Update summary statistics
+            text_summary['total_detections'] += 1
+            text_summary['by_source'][source]['count'] += 1
+            text_summary['by_source'][source]['avg_confidence'] += detection['confidence']
+            # Draw detection on image
+            x1, y1, x2, y2 = detection['bbox']
+            cv2.rectangle(annotated_image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
+            cv2.putText(annotated_image, detection['text'], (int(x1), int(y1)-5),
+                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
+        # Calculate average confidences
+        for source in text_summary['by_source']:
+            if text_summary['by_source'][source]['count'] > 0:
+                text_summary['by_source'][source]['avg_confidence'] /= text_summary['by_source'][source]['count']
+        # Save results with new naming convention
+        base_name = Path(image_path).stem
+        if base_name.startswith('display_'):
+            base_name = base_name[8:]
+        text_result_image_path = os.path.join(output_dir, f"{base_name}_detected_texts.png")
+        text_result_json_path = os.path.join(output_dir, f"{base_name}_detected_texts.json")
+        # Save the annotated image
+        cv2.imwrite(text_result_image_path, annotated_image)
+        # Save the JSON results
+        with open(text_result_json_path, 'w', encoding='utf-8') as f:
+            json.dump({
+                'file_name': image_path,
+                'summary': text_summary,
+                'detections': text_results['detections']
+            }, f, indent=4, ensure_ascii=False)
+        return {
+            'image_path': text_result_image_path,
+            'json_path': text_result_json_path,
+            'results': text_results
+        }, text_summary
+    except Exception as e:
+        logger.error(f"Text detection error: {str(e)}")
+        raise
+def detect_with_tesseract(image):
+    """Detect text using Tesseract OCR"""
+    # Configure Tesseract for technical drawings
+    custom_config = r'--oem 3 --psm 11 -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-.()" -c tessedit_write_images=true -c textord_heavy_nr=true -c textord_min_linesize=3'
+    try:
+        data = pytesseract.image_to_data(
+            image,
+            config=custom_config,
+            output_type=pytesseract.Output.DICT
+        )
+        results = []
+        for i in range(len(data['text'])):
+            conf = float(data['conf'][i])
+            if conf > 30:  # Lower confidence threshold for technical text
+                text = data['text'][i].strip()
+                if text:
+                    x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i]
+                    results.append({
+                        'text': text,
+                        'bbox': [x, y, x + w, y + h],
+                        'confidence': conf / 100.0
+                    })
+        return results
+    except Exception as e:
+        logger.error(f"Tesseract error: {str(e)}")
+        return []
+def detect_with_easyocr(image):
+    """Detect text using EasyOCR"""
+    if easyocr_reader is None:
+        return []
+    try:
+        results = easyocr_reader.readtext(
+            np.array(image),
+            paragraph=False,
+            height_ths=2.0,
+            width_ths=2.0,
+            contrast_ths=0.2,
+            text_threshold=0.5
+        )
+        parsed_results = []
+        for bbox, text, conf in results:
+            x1, y1 = min(point[0] for point in bbox), min(point[1] for point in bbox)
+            x2, y2 = max(point[0] for point in bbox), max(point[1] for point in bbox)
+            parsed_results.append({
+                'text': text,
+                'bbox': [int(x1), int(y1), int(x2), int(y2)],
+                'confidence': conf
+            })
+        return parsed_results
+    except Exception as e:
+        logger.error(f"EasyOCR error: {str(e)}")
+        return []
+def detect_with_doctr(image):
+    """Detect text using DocTR"""
+    try:
+        # Convert PIL image to numpy array
+        image_np = np.array(image)
+        # Get predictions
+        result = doctr_model([image_np])
+        doc = result.export()
+        # Parse results
+        results = []
+        for page in doc['pages']:
+            for block in page['blocks']:
+                for line in block['lines']:
+                    for word in line['words']:
+                        # Convert normalized coordinates to absolute
+                        height, width = image_np.shape[:2]
+                        points = np.array(word['geometry']) * np.array([width, height])
+                        x1, y1 = points.min(axis=0)
+                        x2, y2 = points.max(axis=0)
+                        results.append({
+                            'text': word['value'],
+                            'bbox': [int(x1), int(y1), int(x2), int(y2)],
+                            'confidence': word.get('confidence', 0.5)
+                        })
+        return results
+    except Exception as e:
+        logger.error(f"DocTR error: {str(e)}")
+        return []
+def merge_overlapping_detections(results, iou_threshold=0.5):
+    """Merge overlapping detections from different sources"""
+    if not results:
+        return []
+    def calculate_iou(box1, box2):
+        x1 = max(box1[0], box2[0])
+        y1 = max(box1[1], box2[1])
+        x2 = min(box1[2], box2[2])
+        y2 = min(box1[3], box2[3])
+        if x2 < x1 or y2 < y1:
+            return 0.0
+        intersection = (x2 - x1) * (y2 - y1)
+        area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
+        area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
+        union = area1 + area2 - intersection
+        return intersection / union if union > 0 else 0
+    merged = []
+    used = set()
+    for i, r1 in enumerate(results):
+        if i in used:
+            continue
+        current_group = [r1]
+        used.add(i)
+        for j, r2 in enumerate(results):
+            if j in used:
+                continue
+            if calculate_iou(r1['bbox'], r2['bbox']) > iou_threshold:
+                current_group.append(r2)
+                used.add(j)
+        if len(current_group) == 1:
+            merged.append(current_group[0])
+        else:
+            # Keep the detection with highest confidence
+            best_detection = max(current_group, key=lambda x: x['confidence'])
+            merged.append(best_detection)
+    return merged
+def classify_text(text):
+    """Classify text based on patterns"""
+    if not text:
+        return 'Unknown'
+    # Clean and normalize text
+    text = text.strip().upper()
+    text = re.sub(r'\s+', '', text)
+    for text_type, pattern in TEXT_PATTERNS.items():
+        if re.match(pattern, text):
+            return text_type
+    return 'Unknown'
+def annotate_image(image, results):
+    """Create annotated image with detections"""
+    # Convert image to RGB mode to ensure color support
+    if image.mode != 'RGB':
+        image = image.convert('RGB')
+    # Create drawing object
+    draw = ImageDraw.Draw(image)
+    try:
+        font = ImageFont.truetype("arial.ttf", 20)
+    except IOError:
+        font = ImageFont.load_default()
+    # Define colors for different text types
+    colors = {
+        'Line_Number': "#FF0000",      # Bright Red
+        'Equipment_Tag': "#00FF00",     # Bright Green
+        'Instrument_Tag': "#0000FF",    # Bright Blue
+        'Valve_Number': "#FFA500",      # Bright Orange
+        'Pipe_Size': "#FF00FF",         # Bright Magenta
+        'Process_Instrument': "#00FFFF", # Bright Cyan
+        'Nozzle': "#FFFF00",            # Yellow
+        'Pipe_Connector': "#800080",     # Purple
+        'Unknown': "#FF4444"            # Light Red
+    }
+    # Draw detections
+    for result in results:
+        text_type = result.get('text_type', 'Unknown')
+        color = colors.get(text_type, colors['Unknown'])
+        # Draw bounding box
+        draw.rectangle(result['bbox'], outline=color, width=3)
+        # Create label
+        label = f"{result['text']} ({result['confidence']:.2f})"
+        if text_type != 'Unknown':
+            label += f" [{text_type}]"
+        # Draw label background
+        text_bbox = draw.textbbox((result['bbox'][0], result['bbox'][1] - 20), label, font=font)
+        draw.rectangle(text_bbox, fill="#FFFFFF")
+        # Draw label text
+        draw.text((result['bbox'][0], result['bbox'][1] - 20), label, fill=color, font=font)
+    return image
+def save_annotated_image(image, path, storage):
+    """Save annotated image with maximum quality"""
+    image_byte_array = io.BytesIO()
+    image.save(
+        image_byte_array,
+        format='PNG',
+        optimize=False,
+        compress_level=0
+    )
+    storage.save_file(path, image_byte_array.getvalue())
+if __name__ == "__main__":
+    from storage import StorageFactory
+    import logging
+    # Configure logging
+    logging.basicConfig(level=logging.INFO)
+    logger = logging.getLogger(__name__)
+    # Initialize storage
+    storage = StorageFactory.get_storage()
+    # Test file paths
+    file_path = "processed_pages/10219-1-DG-BC-00011.01-REV_A_page_1_text.png"
+    result_path = "results"
+    try:
+        # Ensure result directory exists
+        os.makedirs(result_path, exist_ok=True)
+        # Process the drawing
+        logger.info(f"Processing file: {file_path}")
+        results, summary = process_drawing(file_path, result_path, storage)
+        # Print detailed results
+        print("\n=== DETAILED DETECTION RESULTS ===")
+        print(f"\nTotal Detections: {summary['total_detections']}")
+        print("\nBreakdown by Text Type:")
+        print("-" * 50)
+        for text_type, stats in summary['by_type'].items():
+            if stats['count'] > 0:
+                print(f"\n{text_type}:")
+                print(f"  Count: {stats['count']}")
+                print(f"  Average Confidence: {stats['avg_confidence']:.2f}")
+                print("  Items:")
+                for item in stats['items']:
+                    print(f"    - {item['text']} (conf: {item['confidence']:.2f}, source: {item['source']})")
+        print("\nBreakdown by OCR Engine:")
+        print("-" * 50)
+        for source, count in summary['by_source'].items():
+            print(f"{source}: {count} detections")
+        print("\nConfidence Distribution:")
+        print("-" * 50)
+        for range_name, count in summary['confidence_ranges'].items():
+            print(f"{range_name}: {count} detections")
+        # Print output paths
+        print("\nOutput Files:")
+        print("-" * 50)
+        print(f"Annotated Image: {results['image_path']}")
+        print(f"JSON Results: {results['json_path']}")
+    except Exception as e:
+        logger.error(f"Error processing file: {e}")
+        raise

utils.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import time
+import numpy as np
+from contextlib import contextmanager
+from loguru import logger
+from typing import List, Dict, Optional, Tuple, Union
+from detection_schema import BBox
+from storage import StorageInterface
+import cv2
+class DebugHandler:
+    """Production-grade debugging and performance tracking"""
+    def __init__(self, enabled: bool = False, storage: StorageInterface = None):
+        self.enabled = enabled
+        self.storage = storage
+        self.metrics = {}
+        self._start_time = None
+    @contextmanager
+    def track_performance(self, operation_name: str):
+        """Context manager for performance tracking"""
+        if self.enabled:
+            self._start_time = time.perf_counter()
+            logger.debug(f"Starting {operation_name}")
+        yield
+        if self.enabled:
+            duration = time.perf_counter() - self._start_time
+            self.metrics[operation_name] = duration
+            logger.debug(f"{operation_name} completed in {duration:.2f}s")
+    def save_artifact(self, name: str, data: bytes, extension: str = "png"):
+        """Generic artifact storage handler"""
+        if self.enabled and self.storage:
+            path = f"debug/{name}.{extension}"
+            # Check if data is an np.ndarray (image)
+            if isinstance(data, np.ndarray):
+                # Convert np.ndarray to PNG bytes
+                success, encoded_image = cv2.imencode(f".{extension}", data)
+                if not success:
+                    logger.error("Failed to encode image for saving.")
+                    return
+                data = encoded_image.tobytes()
+            self.storage.save_file(path, data)
+            logger.info(f"Saved debug artifact: {path}")
+class CoordinateTransformer:
+    @staticmethod
+    def global_to_local_bbox(
+            bbox: Union[BBox, List[BBox]],
+            roi: Optional[np.ndarray]
+    ) -> Union[BBox, List[BBox]]:
+        """
+        Convert global BBox(es) to ROI-local coordinates
+        Handles both single BBox and lists of BBoxes
+        """
+        if roi is None or len(roi) != 4:
+            return bbox
+        x_min, y_min, _, _ = roi
+        def convert(b: BBox) -> BBox:
+            return BBox(
+                xmin=b.xmin - x_min,
+                ymin=b.ymin - y_min,
+                xmax=b.xmax - x_min,
+                ymax=b.ymax - y_min
+            )
+        return map(convert, bbox) if isinstance(bbox, list) else convert(bbox)
+    @staticmethod
+    def local_to_global_bbox(
+            bbox: Union[BBox, List[BBox]],
+            roi: Optional[np.ndarray]
+    ) -> Union[BBox, List[BBox]]:
+        """
+        Convert ROI-local BBox(es) to global coordinates
+        Handles both single BBox and lists of BBoxes
+        """
+        if roi is None or len(roi) != 4:
+            return bbox
+        x_min, y_min, _, _ = roi
+        def convert(b: BBox) -> BBox:
+            return BBox(
+                xmin=b.xmin + x_min,
+                ymin=b.ymin + y_min,
+                xmax=b.xmax + x_min,
+                ymax=b.ymax + y_min
+            )
+        return map(convert, bbox) if isinstance(bbox, list) else convert(bbox)
+    # Maintain legacy tuple support if needed
+    @staticmethod
+    def global_to_local(
+            bboxes: List[Tuple[int, int, int, int]],
+            roi: Optional[np.ndarray]
+    ) -> List[Tuple[int, int, int, int]]:
+        """Legacy tuple version for backward compatibility"""
+        if roi is None or len(roi) != 4:
+            return bboxes
+        x_min, y_min, _, _ = roi
+        return [(x1 - x_min, y1 - y_min, x2 - x_min, y2 - y_min)
+                for x1, y1, x2, y2 in bboxes]
+    @staticmethod
+    def local_to_global(
+            bboxes: List[Tuple[int, int, int, int]],
+            roi: Optional[np.ndarray]
+    ) -> List[Tuple[int, int, int, int]]:
+        """Legacy tuple version for backward compatibility"""
+        if roi is None or len(roi) != 4:
+            return bboxes
+        x_min, y_min, _, _ = roi
+        return [(x1 + x_min, y1 + y_min, x2 + x_min, y2 + y_min)
+                for x1, y1, x2, y2 in bboxes]
+    @staticmethod
+    def local_to_global_point(point: Tuple[int, int], roi: Optional[np.ndarray]) -> Tuple[int, int]:
+        """Convert single point from local to global coordinates"""
+        if roi is None or len(roi) != 4:
+            return point
+        x_min, y_min, _, _ = roi
+        return (int(point[0] + x_min), int(point[1] + y_min))