Spaces:

vitaraanalytics
/

llava-chart-analyzer

Paused

App Files Files Community

ravi-vc commited on Sep 22, 2025

Commit

05c0bda

verified ·

1 Parent(s): 433c9b8

Update app.py

Browse files

Files changed (1) hide show

app.py +780 -229

app.py CHANGED Viewed

@@ -15,365 +15,916 @@ import io
 import base64
 import requests
 import warnings
 # Suppress warnings
 warnings.filterwarnings("ignore")
-class ChartAnalyzer:
     def __init__(self):
-        # Load models
         self.load_models()
     def load_models(self):
-        """Load all required models"""
         try:
             print("Loading BLIP model...")
-            # BLIP for image captioning and understanding
             self.blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
             self.blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
             print("Loading TrOCR model...")
-            # TrOCR for text extraction
             self.trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
             self.trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")
             print("Loading EasyOCR...")
-            # EasyOCR for backup text extraction
-            self.ocr_reader = easyocr.Reader(['en'], gpu=False)  # Force CPU to avoid GPU issues
-            # Florence-2 for advanced understanding (optional)
             try:
                 print("Attempting to load Florence-2...")
                 self.florence_processor = AutoProcessor.from_pretrained("microsoft/Florence-2-base", trust_remote_code=True)
                 self.florence_model = AutoModelForCausalLM.from_pretrained("microsoft/Florence-2-base", trust_remote_code=True)
-                self.florence_available = True
                 print("Florence-2 loaded successfully!")
             except Exception as e:
                 print(f"Florence-2 not available: {e}")
-                self.florence_available = False
-            print("All models loaded successfully!")
         except Exception as e:
             print(f"Error loading models: {e}")
             raise e
-    def analyze_chart(self, image, analysis_type="comprehensive"):
-        """Main function to analyze charts"""
-        if image is None:
-            return "Please upload an image first."
-        results = {}
         try:
             # Convert to PIL Image if needed
             if not isinstance(image, Image.Image):
                 image = Image.fromarray(image).convert('RGB')
-            # Basic image understanding with BLIP
-            results['description'] = self.get_image_description(image)
-            # Extract text using multiple methods
-            results['extracted_text'] = self.extract_text_multi_method(image)
-            # Chart type detection
-            results['chart_type'] = self.detect_chart_type(image, results['description'])
-            # Data extraction (if possible)
-            if analysis_type in ["comprehensive", "data_extraction"]:
-                results['data_points'] = self.extract_data_points(image, results['chart_type'])
-            # Advanced analysis with Florence-2 (if available)
-            if self.florence_available and analysis_type == "comprehensive":
-                results['advanced_analysis'] = self.florence_analysis(image)
-            return self.format_results(results)
         except Exception as e:
-            return f"Error analyzing chart: {str(e)}"
-    def get_image_description(self, image):
-        """Get image description using BLIP"""
         try:
-            inputs = self.blip_processor(image, return_tensors="pt")
-            out = self.blip_model.generate(**inputs, max_length=100)
-            description = self.blip_processor.decode(out[0], skip_special_tokens=True)
-            return description
-        except:
-            return "Unable to generate description"
-    def extract_text_multi_method(self, image):
-        """Extract text using multiple OCR methods"""
-        extracted_texts = {}
-        # Method 1: TrOCR
-        try:
-            pixel_values = self.trocr_processor(image, return_tensors="pt").pixel_values
-            generated_ids = self.trocr_model.generate(pixel_values)
-            trocr_text = self.trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-            extracted_texts['TrOCR'] = trocr_text
-        except:
-            extracted_texts['TrOCR'] = "Failed"
-        # Method 2: EasyOCR
         try:
-            # Convert PIL to numpy array
-            image_np = np.array(image)
-            ocr_results = self.ocr_reader.readtext(image_np)
-            easyocr_text = ' '.join([result[1] for result in ocr_results])
-            extracted_texts['EasyOCR'] = easyocr_text
-        except:
-            extracted_texts['EasyOCR'] = "Failed"
-        return extracted_texts
-    def detect_chart_type(self, image, description):
-        """Detect chart type based on image analysis"""
-        description_lower = description.lower()
-        chart_keywords = {
-            'bar_chart': ['bar', 'column', 'histogram'],
-            'line_chart': ['line', 'trend', 'time series'],
-            'pie_chart': ['pie', 'circular', 'slice'],
-            'scatter_plot': ['scatter', 'correlation', 'points'],
-            'area_chart': ['area', 'filled'],
-            'box_plot': ['box', 'whisker'],
-            'heatmap': ['heat', 'color coded', 'matrix']
         }
-        for chart_type, keywords in chart_keywords.items():
-            if any(keyword in description_lower for keyword in keywords):
-                return chart_type.replace('_', ' ').title()
         return "Unknown Chart Type"
-    def extract_data_points(self, image, chart_type):
-        """Attempt to extract data points (simplified approach)"""
         try:
-            # This is a simplified version - real implementation would be more sophisticated
-            # Convert to grayscale for analysis
             image_np = np.array(image.convert('L'))
-            # Basic edge detection
-            edges = cv2.Canny(image_np, 50, 150)
-            # Find contours
-            contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-            data_info = {
-                'contours_found': len(contours),
-                'image_dimensions': image_np.shape,
-                'note': 'This is a simplified data extraction. Advanced algorithms needed for accurate data point extraction.'
             }
-            return data_info
         except Exception as e:
-            return f"Data extraction failed: {str(e)}"
-    def florence_analysis(self, image):
-        """Advanced analysis using Florence-2"""
-        if not self.florence_available:
-            return "Florence-2 model not available"
         try:
-            # Florence-2 prompts for different tasks
-            prompts = [
-                "<OD>",  # Object Detection
-                "<DENSE_REGION_CAPTION>",  # Dense captioning
-                "<OCR_WITH_REGION>"  # OCR with regions
-            ]
-            results = {}
-            for prompt in prompts:
-                inputs = self.florence_processor(text=prompt, images=image, return_tensors="pt")
-                generated_ids = self.florence_model.generate(
-                    input_ids=inputs["input_ids"],
-                    pixel_values=inputs["pixel_values"],
-                    max_new_tokens=1024,
-                    num_beams=3
-                )
-                generated_text = self.florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
-                results[prompt] = generated_text
-            return results
-        except:
-            return "Florence-2 analysis failed"
-    def format_results(self, results):
-        """Format results for display"""
-        formatted = "# Chart Analysis Results\n\n"
-        if 'description' in results:
-            formatted += f"## Image Description\n{results['description']}\n\n"
-        if 'chart_type' in results:
-            formatted += f"## Chart Type\n{results['chart_type']}\n\n"
-        if 'extracted_text' in results:
-            formatted += "## Extracted Text\n"
-            for method, text in results['extracted_text'].items():
-                formatted += f"**{method}:** {text}\n\n"
-        if 'data_points' in results:
-            formatted += f"## Data Analysis\n{results['data_points']}\n\n"
-        if 'advanced_analysis' in results:
-            formatted += f"## Advanced Analysis\n{results['advanced_analysis']}\n\n"
         return formatted
-# Initialize the analyzer
-analyzer = ChartAnalyzer()
-# Create Gradio interface
-def analyze_uploaded_chart(image, analysis_type):
-    return analyzer.analyze_chart(image, analysis_type)
-# Create the Gradio app
-with gr.Blocks(title="Chart Analyzer & Data Extractor", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 📊 Chart Analyzer & Data Extractor")
-    gr.Markdown("Upload a chart image to extract data and analyze its contents using multiple AI models including BLIP, TrOCR, and Florence-2.")
     with gr.Row():
         with gr.Column(scale=1):
-            gr.Markdown("## 📁 Upload Your Chart")
-            # Multiple upload options
-            with gr.Tabs():
-                with gr.Tab("📤 Upload Image"):
-                    image_input = gr.Image(
-                        type="pil",
-                        label="Upload Chart Image",
-                        height=400,
-                        sources=["upload", "webcam", "clipboard"],
-                        format="png"
-                    )
-                    gr.Markdown("**Supported formats:** PNG, JPG, JPEG, GIF, BMP")
-                    gr.Markdown("**Max size:** 10MB")
-                with gr.Tab("🔗 From URL"):
-                    url_input = gr.Textbox(
-                        label="Image URL",
-                        placeholder="https://example.com/chart.png"
-                    )
-                    load_url_btn = gr.Button("Load from URL")
-            # Analysis options
-            gr.Markdown("## ⚙️ Analysis Settings")
             analysis_type = gr.Dropdown(
-                choices=["basic", "comprehensive", "data_extraction"],
                 value="comprehensive",
                 label="Analysis Type",
-                info="Choose the depth of analysis"
             )
-            with gr.Accordion("Advanced Options", open=False):
                 confidence_threshold = gr.Slider(
                     minimum=0.1,
                     maximum=1.0,
                     value=0.5,
                     label="OCR Confidence Threshold"
                 )
-                use_florence = gr.Checkbox(
-                    label="Use Florence-2 (Advanced Analysis)",
-                    value=True
-                )
             analyze_btn = gr.Button("🔍 Analyze Chart", variant="primary", size="lg")
             clear_btn = gr.Button("🗑️ Clear All", variant="secondary")
         with gr.Column(scale=2):
-            gr.Markdown("## 📊 Analysis Results")
-            output = gr.Markdown(
-                value="Upload an image and click 'Analyze Chart' to see results here.",
-                label="Results"
-            )
-            # Additional output components
-            with gr.Accordion("Raw Data Export", open=False):
-                json_output = gr.JSON(label="Structured Data")
-                csv_download = gr.File(label="Download CSV", visible=False)
-    # Function to load image from URL
-    def load_image_from_url(url):
         try:
-            import requests
-            response = requests.get(url)
-            response.raise_for_status()
-            image = Image.open(io.BytesIO(response.content))
-            return image, "Image loaded successfully!"
         except Exception as e:
-            return None, f"Error loading image: {str(e)}"
-    # Enhanced analysis function
-    def analyze_uploaded_chart(image, analysis_type, confidence_threshold, use_florence):
-        if image is None:
-            return "Please upload an image first.", {}, None
         try:
-            result = analyzer.analyze_chart(image, analysis_type)
-            # Create structured data for JSON output
-            structured_data = {
-                "analysis_type": analysis_type,
-                "confidence_threshold": confidence_threshold,
-                "models_used": ["BLIP", "TrOCR", "EasyOCR"],
-                "timestamp": pd.Timestamp.now().isoformat()
-            }
-            if use_florence and analyzer.florence_available:
-                structured_data["models_used"].append("Florence-2")
-            return result, structured_data, None
         except Exception as e:
-            error_msg = f"Error analyzing chart: {str(e)}"
-            return error_msg, {"error": error_msg}, None
-    # Clear function
-    def clear_all():
-        return None, "Upload an image and click 'Analyze Chart' to see results here.", {}, None
-    # Examples
-    # gr.Examples(
-    #     examples=[
-    #         ["https://via.placeholder.com/600x400/0066CC/FFFFFF?text=Sample+Bar+Chart", "comprehensive"],
-    #         ["https://via.placeholder.com/600x400/FF6B35/FFFFFF?text=Sample+Line+Chart", "data_extraction"],
-    #     ],
-    #     inputs=[image_input, analysis_type],
-    #     label="Try these examples:"
-    # )
-    # Event handlers
     analyze_btn.click(
-        fn=analyze_uploaded_chart,
-        inputs=[image_input, analysis_type, confidence_threshold, use_florence],
-        outputs=[output, json_output, csv_download]
     )
-    load_url_btn.click(
-        fn=load_image_from_url,
-        inputs=[url_input],
-        outputs=[image_input, output]
     )
     clear_btn.click(
-        fn=clear_all,
-        outputs=[image_input, output, json_output, csv_download]
     )
 if __name__ == "__main__":
-    print("Starting Chart Analyzer...")
     try:
         demo.launch(
             server_name="0.0.0.0",
             server_port=7860,
             share=False,
             show_error=True,
-            quiet=False
         )
     except Exception as e:
-        print(f"Error launching app: {e}")
-        # Fallback launch
         demo.launch()

 import base64
 import requests
 import warnings
+import json
+from datetime import datetime
+from typing import Dict, List, Any, Optional
+import re
 # Suppress warnings
 warnings.filterwarnings("ignore")
+class StructuredChartAnalyzer:
     def __init__(self):
+        """Initialize the enhanced chart analyzer with structured output capabilities"""
         self.load_models()
+        self.prompt_templates = self._init_prompt_templates()
+    def _init_prompt_templates(self) -> Dict[str, str]:
+        """Initialize predefined prompt templates for different analysis types"""
+        return {
+            "comprehensive": "Analyze this chart comprehensively. Identify the chart type, extract all visible text including titles, labels, legends, and data values. Describe the data trends, patterns, and key insights.",
+            "data_extraction": "Focus on extracting numerical data from this chart. Identify all data points, values, categories, and measurements. Pay special attention to axis labels, data series, and quantitative information.",
+            "visual_elements": "Describe the visual elements of this chart including colors, chart type, layout, axes, legends, and overall design. Focus on the structural components.",
+            "trend_analysis": "Analyze the trends and patterns shown in this chart. Identify increasing/decreasing trends, correlations, outliers, and significant data patterns. Provide insights about what the data reveals.",
+            "accessibility": "Describe this chart in a way that would be helpful for visually impaired users. Include all textual content, data relationships, and key findings in a clear, structured manner.",
+            "business_insights": "Analyze this chart from a business perspective. What are the key performance indicators, trends, and actionable insights that can be derived from this data?"
+        }
     def load_models(self):
+        """Load all required models with better error handling"""
+        self.models_loaded = {
+            'blip': False,
+            'trocr': False,
+            'easyocr': False,
+            'florence': False
+        }
         try:
             print("Loading BLIP model...")
             self.blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
             self.blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+            self.models_loaded['blip'] = True
             print("Loading TrOCR model...")
             self.trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-printed")
             self.trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-printed")
+            self.models_loaded['trocr'] = True
             print("Loading EasyOCR...")
+            self.ocr_reader = easyocr.Reader(['en'], gpu=False)
+            self.models_loaded['easyocr'] = True
+            # Florence-2 for advanced understanding
             try:
                 print("Attempting to load Florence-2...")
                 self.florence_processor = AutoProcessor.from_pretrained("microsoft/Florence-2-base", trust_remote_code=True)
                 self.florence_model = AutoModelForCausalLM.from_pretrained("microsoft/Florence-2-base", trust_remote_code=True)
+                self.models_loaded['florence'] = True
                 print("Florence-2 loaded successfully!")
             except Exception as e:
                 print(f"Florence-2 not available: {e}")
+                self.models_loaded['florence'] = False
+            print("Model loading completed!")
         except Exception as e:
             print(f"Error loading models: {e}")
             raise e
+    def analyze_chart_with_prompt(self, image, custom_prompt: str = None, analysis_type: str = "comprehensive") -> Dict[str, Any]:
+        """
+        Main function to analyze charts with structured JSON output
+        Args:
+            image: PIL Image or numpy array
+            custom_prompt: Custom analysis prompt
+            analysis_type: Type of analysis to perform
+        Returns:
+            Structured dictionary with analysis results
+        """
+        # Initialize structured output
+        structured_output = {
+            "metadata": {
+                "timestamp": datetime.now().isoformat(),
+                "analysis_type": analysis_type,
+                "models_used": [model for model, loaded in self.models_loaded.items() if loaded],
+                "prompt_used": custom_prompt or self.prompt_templates.get(analysis_type, self.prompt_templates["comprehensive"])
+            },
+            "image_info": {},
+            "text_extraction": {},
+            "chart_analysis": {},
+            "data_insights": {},
+            "quality_metrics": {},
+            "errors": []
+        }
+        if image is None:
+            structured_output["errors"].append("No image provided")
+            return structured_output
         try:
             # Convert to PIL Image if needed
             if not isinstance(image, Image.Image):
                 image = Image.fromarray(image).convert('RGB')
+            # Extract image metadata
+            structured_output["image_info"] = self._extract_image_info(image)
+            # Text extraction with multiple methods
+            structured_output["text_extraction"] = self._extract_text_comprehensive(image)
+            # Chart type and structure analysis
+            structured_output["chart_analysis"] = self._analyze_chart_structure(image, structured_output["text_extraction"])
+            # Data insights extraction
+            structured_output["data_insights"] = self._extract_data_insights(image, structured_output)
+            # Quality assessment
+            structured_output["quality_metrics"] = self._assess_quality(image, structured_output)
+            # Advanced analysis with Florence-2 if available and requested
+            if self.models_loaded['florence'] and analysis_type in ["comprehensive", "advanced"]:
+                structured_output["advanced_analysis"] = self._florence_advanced_analysis(image, custom_prompt)
+            return structured_output
         except Exception as e:
+            structured_output["errors"].append(f"Analysis error: {str(e)}")
+            return structured_output
+    def _extract_image_info(self, image: Image.Image) -> Dict[str, Any]:
+        """Extract basic image information"""
         try:
+            return {
+                "dimensions": {
+                    "width": image.size[0],
+                    "height": image.size[1]
+                },
+                "format": image.format or "Unknown",
+                "mode": image.mode,
+                "has_transparency": image.mode in ("RGBA", "LA"),
+                "aspect_ratio": round(image.size[0] / image.size[1], 2)
+            }
+        except Exception as e:
+            return {"error": str(e)}
+    def _extract_text_comprehensive(self, image: Image.Image) -> Dict[str, Any]:
+        """Comprehensive text extraction with multiple methods"""
+        text_results = {
+            "methods_used": [],
+            "extracted_texts": {},
+            "confidence_scores": {},
+            "combined_text": "",
+            "detected_numbers": [],
+            "detected_labels": []
+        }
+        # TrOCR extraction
+        if self.models_loaded['trocr']:
+            try:
+                pixel_values = self.trocr_processor(image, return_tensors="pt").pixel_values
+                generated_ids = self.trocr_model.generate(pixel_values, max_length=200)
+                trocr_text = self.trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+                text_results["extracted_texts"]["trocr"] = trocr_text
+                text_results["methods_used"].append("TrOCR")
+            except Exception as e:
+                text_results["extracted_texts"]["trocr"] = f"Error: {str(e)}"
+        # EasyOCR extraction
+        if self.models_loaded['easyocr']:
+            try:
+                image_np = np.array(image)
+                ocr_results = self.ocr_reader.readtext(image_np)
+                easyocr_data = []
+                for bbox, text, confidence in ocr_results:
+                    easyocr_data.append({
+                        "text": text,
+                        "confidence": float(confidence),
+                        "bbox": bbox
+                    })
+                easyocr_text = ' '.join([result["text"] for result in easyocr_data])
+                text_results["extracted_texts"]["easyocr"] = easyocr_text
+                text_results["confidence_scores"]["easyocr"] = easyocr_data
+                text_results["methods_used"].append("EasyOCR")
+            except Exception as e:
+                text_results["extracted_texts"]["easyocr"] = f"Error: {str(e)}"
+        # Combine and analyze text
+        all_texts = [text for text in text_results["extracted_texts"].values() if not text.startswith("Error:")]
+        text_results["combined_text"] = " ".join(all_texts)
+        # Extract numbers and potential labels
+        text_results["detected_numbers"] = self._extract_numbers(text_results["combined_text"])
+        text_results["detected_labels"] = self._extract_potential_labels(text_results["combined_text"])
+        return text_results
+    def _extract_numbers(self, text: str) -> List[Dict[str, Any]]:
+        """Extract numbers from text with context"""
+        number_patterns = [
+            r'\d+\.?\d*%',  # Percentages
+            r'\$\d+\.?\d*',  # Currency
+            r'\d{1,3}(?:,\d{3})*\.?\d*',  # Numbers with commas
+            r'\d+\.?\d*'  # Simple numbers
+        ]
+        numbers = []
+        for pattern in number_patterns:
+            matches = re.finditer(pattern, text)
+            for match in matches:
+                numbers.append({
+                    "value": match.group(),
+                    "position": match.span(),
+                    "type": "percentage" if "%" in match.group() else
+                           "currency" if "$" in match.group() else "number"
+                })
+        return numbers
+    def _extract_potential_labels(self, text: str) -> List[str]:
+        """Extract potential chart labels and categories"""
+        # Simple heuristic to find potential labels
+        words = text.split()
+        potential_labels = []
+        for word in words:
+            # Skip pure numbers
+            if re.match(r'^\d+\.?\d*$', word):
+                continue
+            # Skip very short words
+            if len(word) < 2:
+                continue
+            # Add words that might be labels
+            if word.istitle() or word.isupper():
+                potential_labels.append(word)
+        return list(set(potential_labels))
+    def _analyze_chart_structure(self, image: Image.Image, text_data: Dict) -> Dict[str, Any]:
+        """Analyze chart structure and type"""
+        analysis = {
+            "chart_type": "unknown",
+            "confidence": 0.0,
+            "visual_elements": {},
+            "layout_analysis": {}
+        }
+        # Get image description from BLIP
+        if self.models_loaded['blip']:
+            try:
+                inputs = self.blip_processor(image, return_tensors="pt")
+                out = self.blip_model.generate(**inputs, max_length=150)
+                description = self.blip_processor.decode(out[0], skip_special_tokens=True)
+                analysis["description"] = description
+                # Chart type detection based on description and text
+                analysis["chart_type"] = self._detect_chart_type_advanced(description, text_data["combined_text"])
+            except Exception as e:
+                analysis["description"] = f"Error: {str(e)}"
+        # Visual analysis
         try:
+            analysis["visual_elements"] = self._analyze_visual_elements(image)
+            analysis["layout_analysis"] = self._analyze_layout(image)
+        except Exception as e:
+            analysis["visual_elements"] = {"error": str(e)}
+        return analysis
+    def _detect_chart_type_advanced(self, description: str, text: str) -> str:
+        """Advanced chart type detection with confidence scoring"""
+        combined_text = (description + " " + text).lower()
+        chart_indicators = {
+            'bar_chart': ['bar', 'column', 'histogram', 'vertical bars', 'horizontal bars'],
+            'line_chart': ['line', 'trend', 'time series', 'curve', 'linear'],
+            'pie_chart': ['pie', 'circular', 'slice', 'wedge', 'donut'],
+            'scatter_plot': ['scatter', 'correlation', 'points', 'dots', 'plot'],
+            'area_chart': ['area', 'filled', 'stacked area'],
+            'box_plot': ['box', 'whisker', 'quartile', 'median'],
+            'heatmap': ['heat', 'color coded', 'matrix', 'intensity'],
+            'gauge': ['gauge', 'dial', 'speedometer', 'meter'],
+            'funnel': ['funnel', 'conversion', 'stages'],
+            'radar': ['radar', 'spider', 'web chart']
         }
+        scores = {}
+        for chart_type, keywords in chart_indicators.items():
+            score = sum(1 for keyword in keywords if keyword in combined_text)
+            if score > 0:
+                scores[chart_type] = score
+        if scores:
+            best_match = max(scores.items(), key=lambda x: x[1])
+            return best_match[0].replace('_', ' ').title()
         return "Unknown Chart Type"
+    def _analyze_visual_elements(self, image: Image.Image) -> Dict[str, Any]:
+        """Analyze visual elements of the chart"""
+        try:
+            image_np = np.array(image)
+            # Color analysis
+            colors = image_np.reshape(-1, 3)
+            unique_colors = np.unique(colors, axis=0)
+            dominant_colors = self._get_dominant_colors(colors)
+            # Edge analysis
+            gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
+            edges = cv2.Canny(gray, 50, 150)
+            return {
+                "color_count": len(unique_colors),
+                "dominant_colors": dominant_colors,
+                "edge_density": np.sum(edges > 0) / edges.size,
+                "brightness": float(np.mean(gray) / 255),
+                "contrast": float(np.std(gray) / 255)
+            }
+        except Exception as e:
+            return {"error": str(e)}
+    def _get_dominant_colors(self, colors: np.ndarray, n_colors: int = 5) -> List[List[int]]:
+        """Get dominant colors from image"""
+        try:
+            from sklearn.cluster import KMeans
+            kmeans = KMeans(n_clusters=min(n_colors, len(np.unique(colors, axis=0))), random_state=42)
+            kmeans.fit(colors)
+            return [color.astype(int).tolist() for color in kmeans.cluster_centers_]
+        except:
+            # Fallback without sklearn
+            unique_colors = np.unique(colors, axis=0)
+            return unique_colors[:n_colors].tolist()
+    def _analyze_layout(self, image: Image.Image) -> Dict[str, Any]:
+        """Analyze chart layout and structure"""
         try:
             image_np = np.array(image.convert('L'))
+            # Find potential axes
+            h_lines = self._detect_horizontal_lines(image_np)
+            v_lines = self._detect_vertical_lines(image_np)
+            return {
+                "horizontal_lines": len(h_lines),
+                "vertical_lines": len(v_lines),
+                "has_grid": len(h_lines) > 2 and len(v_lines) > 2,
+                "image_regions": self._identify_regions(image_np)
+            }
+        except Exception as e:
+            return {"error": str(e)}
+    def _detect_horizontal_lines(self, gray_image: np.ndarray) -> List:
+        """Detect horizontal lines in image"""
+        horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25, 1))
+        detected_lines = cv2.morphologyEx(gray_image, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
+        cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        return cnts[0] if len(cnts) == 2 else cnts[1]
+    def _detect_vertical_lines(self, gray_image: np.ndarray) -> List:
+        """Detect vertical lines in image"""
+        vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 25))
+        detected_lines = cv2.morphologyEx(gray_image, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
+        cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        return cnts[0] if len(cnts) == 2 else cnts[1]
+    def _identify_regions(self, image: np.ndarray) -> Dict[str, Any]:
+        """Identify different regions of the chart"""
+        h, w = image.shape
+        return {
+            "title_region": {"y": 0, "height": h // 10},
+            "chart_area": {"y": h // 10, "height": int(h * 0.7)},
+            "legend_area": {"y": int(h * 0.8), "height": h // 5},
+            "total_dimensions": {"width": w, "height": h}
+        }
+    def _extract_data_insights(self, image: Image.Image, analysis_data: Dict) -> Dict[str, Any]:
+        """Extract data insights and patterns"""
+        insights = {
+            "numerical_data": [],
+            "categories": [],
+            "trends": [],
+            "outliers": [],
+            "summary_statistics": {}
+        }
+        try:
+            # Extract numerical values
+            numbers = analysis_data["text_extraction"]["detected_numbers"]
+            numerical_values = []
+            for num_data in numbers:
+                if num_data["type"] == "number":
+                    try:
+                        # Clean and convert number
+                        clean_num = re.sub(r'[,\s]', '', num_data["value"])
+                        value = float(clean_num)
+                        numerical_values.append(value)
+                    except:
+                        continue
+            if numerical_values:
+                insights["numerical_data"] = numerical_values
+                insights["summary_statistics"] = {
+                    "count": len(numerical_values),
+                    "min": min(numerical_values),
+                    "max": max(numerical_values),
+                    "mean": np.mean(numerical_values),
+                    "median": np.median(numerical_values),
+                    "std": np.std(numerical_values) if len(numerical_values) > 1 else 0
+                }
+            # Categories from labels
+            insights["categories"] = analysis_data["text_extraction"]["detected_labels"]
+            return insights
+        except Exception as e:
+            insights["error"] = str(e)
+            return insights
+    def _assess_quality(self, image: Image.Image, analysis_data: Dict) -> Dict[str, Any]:
+        """Assess the quality and readability of the chart"""
+        quality = {
+            "overall_score": 0.0,
+            "readability": {},
+            "completeness": {},
+            "technical_quality": {}
+        }
+        try:
+            # Text extraction quality
+            text_methods = len(analysis_data["text_extraction"]["methods_used"])
+            extracted_text_length = len(analysis_data["text_extraction"]["combined_text"])
+            quality["readability"] = {
+                "text_extraction_methods": text_methods,
+                "text_length": extracted_text_length,
+                "numbers_detected": len(analysis_data["text_extraction"]["detected_numbers"]),
+                "labels_detected": len(analysis_data["text_extraction"]["detected_labels"])
+            }
+            # Completeness assessment
+            has_title = "title" in analysis_data["text_extraction"]["combined_text"].lower()
+            has_numbers = len(analysis_data["text_extraction"]["detected_numbers"]) > 0
+            has_labels = len(analysis_data["text_extraction"]["detected_labels"]) > 0
+            quality["completeness"] = {
+                "has_title": has_title,
+                "has_numerical_data": has_numbers,
+                "has_labels": has_labels,
+                "chart_type_identified": analysis_data["chart_analysis"]["chart_type"] != "Unknown Chart Type"
             }
+            # Technical quality
+            visual_elements = analysis_data["chart_analysis"].get("visual_elements", {})
+            if not visual_elements.get("error"):
+                quality["technical_quality"] = {
+                    "image_brightness": visual_elements.get("brightness", 0),
+                    "image_contrast": visual_elements.get("contrast", 0),
+                    "color_diversity": visual_elements.get("color_count", 0),
+                    "edge_clarity": visual_elements.get("edge_density", 0)
+                }
+            # Calculate overall score
+            completeness_score = sum(quality["completeness"].values()) / len(quality["completeness"])
+            readability_score = min(1.0, (extracted_text_length / 100) * 0.5 + (text_methods / 2) * 0.5)
+            quality["overall_score"] = (completeness_score * 0.6 + readability_score * 0.4)
         except Exception as e:
+            quality["error"] = str(e)
+        return quality
+    def _florence_advanced_analysis(self, image: Image.Image, custom_prompt: str = None) -> Dict[str, Any]:
+        """Advanced analysis using Florence-2 with custom prompts"""
+        if not self.models_loaded['florence']:
+            return {"error": "Florence-2 model not available"}
+        florence_results = {}
+        # Standard Florence-2 tasks
+        florence_tasks = {
+            "object_detection": "<OD>",
+            "dense_caption": "<DENSE_REGION_CAPTION>",
+            "ocr_with_regions": "<OCR_WITH_REGION>",
+            "detailed_caption": "<MORE_DETAILED_CAPTION>"
+        }
+        # Add custom prompt if provided
+        if custom_prompt:
+            florence_tasks["custom_analysis"] = f"<CAPTION>{custom_prompt}"
         try:
+            for task_name, prompt in florence_tasks.items():
+                try:
+                    inputs = self.florence_processor(text=prompt, images=image, return_tensors="pt")
+                    generated_ids = self.florence_model.generate(
+                        input_ids=inputs["input_ids"],
+                        pixel_values=inputs["pixel_values"],
+                        max_new_tokens=1024,
+                        num_beams=3,
+                        do_sample=False
+                    )
+                    generated_text = self.florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+                    florence_results[task_name] = self._parse_florence_output(generated_text, prompt)
+                except Exception as e:
+                    florence_results[task_name] = {"error": str(e)}
+            return florence_results
+        except Exception as e:
+            return {"error": f"Florence-2 analysis failed: {str(e)}"}
+    def _parse_florence_output(self, output: str, prompt: str) -> Dict[str, Any]:
+        """Parse Florence-2 output into structured format"""
+        try:
+            # Remove the prompt from the output
+            if prompt in output:
+                parsed_output = output.replace(prompt, "").strip()
+            else:
+                parsed_output = output.strip()
+            # Try to parse as JSON if it looks like structured data
+            if parsed_output.startswith('{') and parsed_output.endswith('}'):
+                try:
+                    return json.loads(parsed_output)
+                except:
+                    pass
+            return {"raw_output": parsed_output}
+        except Exception as e:
+            return {"error": str(e), "raw_output": output}
+    def format_results_for_display(self, structured_output: Dict[str, Any]) -> str:
+        """Format structured results for human-readable display"""
+        formatted = "# 📊 Enhanced Chart Analysis Results\n\n"
+        # Metadata
+        metadata = structured_output.get("metadata", {})
+        formatted += f"**Analysis Type:** {metadata.get('analysis_type', 'Unknown')}\n"
+        formatted += f"**Timestamp:** {metadata.get('timestamp', 'Unknown')}\n"
+        formatted += f"**Models Used:** {', '.join(metadata.get('models_used', []))}\n\n"
+        # Image Info
+        image_info = structured_output.get("image_info", {})
+        if not image_info.get("error"):
+            dims = image_info.get("dimensions", {})
+            formatted += f"## 🖼️ Image Information\n"
+            formatted += f"**Dimensions:** {dims.get('width', 'Unknown')} x {dims.get('height', 'Unknown')}\n"
+            formatted += f"**Format:** {image_info.get('format', 'Unknown')}\n"
+            formatted += f"**Aspect Ratio:** {image_info.get('aspect_ratio', 'Unknown')}\n\n"
+        # Chart Analysis
+        chart_analysis = structured_output.get("chart_analysis", {})
+        formatted += f"## 📈 Chart Analysis\n"
+        formatted += f"**Chart Type:** {chart_analysis.get('chart_type', 'Unknown')}\n"
+        if chart_analysis.get("description"):
+            formatted += f"**Description:** {chart_analysis['description']}\n\n"
+        # Text Extraction
+        text_extraction = structured_output.get("text_extraction", {})
+        if text_extraction.get("combined_text"):
+            formatted += f"## 📝 Extracted Text\n"
+            formatted += f"**Methods Used:** {', '.join(text_extraction.get('methods_used', []))}\n"
+            formatted += f"**Combined Text:** {text_extraction['combined_text']}\n"
+            if text_extraction.get("detected_numbers"):
+                formatted += f"**Numbers Found:** {len(text_extraction['detected_numbers'])}\n"
+            if text_extraction.get("detected_labels"):
+                formatted += f"**Labels Found:** {', '.join(text_extraction['detected_labels'])}\n\n"
+        # Data Insights
+        data_insights = structured_output.get("data_insights", {})
+        if data_insights.get("summary_statistics"):
+            stats = data_insights["summary_statistics"]
+            formatted += f"## 📊 Data Insights\n"
+            formatted += f"**Data Points:** {stats.get('count', 0)}\n"
+            formatted += f"**Range:** {stats.get('min', 'N/A')} - {stats.get('max', 'N/A')}\n"
+            formatted += f"**Average:** {stats.get('mean', 'N/A'):.2f}\n"
+            formatted += f"**Median:** {stats.get('median', 'N/A'):.2f}\n\n"
+        # Quality Assessment
+        quality = structured_output.get("quality_metrics", {})
+        if quality.get("overall_score") is not None:
+            formatted += f"## ⭐ Quality Assessment\n"
+            formatted += f"**Overall Score:** {quality['overall_score']:.2f}/1.0\n"
+            completeness = quality.get("completeness", {})
+            if completeness:
+                formatted += f"**Has Title:** {'Yes' if completeness.get('has_title') else 'No'}\n"
+                formatted += f"**Has Data:** {'Yes' if completeness.get('has_numerical_data') else 'No'}\n"
+                formatted += f"**Chart Type Identified:** {'Yes' if completeness.get('chart_type_identified') else 'No'}\n\n"
+        # Errors
+        errors = structured_output.get("errors", [])
+        if errors:
+            formatted += f"## ⚠️ Errors\n"
+            for error in errors:
+                formatted += f"- {error}\n"
+            formatted += "\n"
         return formatted
+# Initialize the enhanced analyzer
+analyzer = StructuredChartAnalyzer()
+def analyze_with_structured_output(image, analysis_type, custom_prompt, include_florence):
+    """Wrapper function for Gradio interface"""
+    if custom_prompt.strip():
+        prompt_to_use = custom_prompt
+    else:
+        prompt_to_use = None
+    # Get structured output
+    structured_result = analyzer.analyze_chart_with_prompt(
+        image,
+        custom_prompt=prompt_to_use,
+        analysis_type=analysis_type
+    )
+    # Format for display
+    formatted_display = analyzer.format_results_for_display(structured_result)
+    # Create CSV data if possible
+    csv_data = None
+    data_insights = structured_result.get("data_insights", {})
+    if data_insights.get("numerical_data"):
+        df = pd.DataFrame({
+            'Values': data_insights["numerical_data"],
+            'Categories': data_insights.get("categories", [""] * len(data_insights["numerical_data"]))[:len(data_insights["numerical_data"])]
+        })
+        csv_buffer = io.StringIO()
+        df.to_csv(csv_buffer, index=False)
+        csv_data = csv_buffer.getvalue()
+    return formatted_display, structured_result, csv_data
+# Enhanced Gradio interface
+with gr.Blocks(title="Enhanced Chart Analyzer with Structured Output", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 📊 Enhanced Chart Analyzer with Structured JSON Output")
+    gr.Markdown("Upload a chart image and get comprehensive analysis with structured data output. Supports custom prompts and multiple AI models.")
     with gr.Row():
         with gr.Column(scale=1):
+            gr.Markdown("## 🔍 Analysis Configuration")
+            image_input = gr.Image(
+                type="pil",
+                label="Upload Chart Image",
+                height=300
+            )
             analysis_type = gr.Dropdown(
+                choices=list(analyzer.prompt_templates.keys()),
                 value="comprehensive",
                 label="Analysis Type",
+                info="Choose predefined analysis type or use custom prompt"
+            )
+            custom_prompt = gr.Textbox(
+                label="Custom Analysis Prompt",
+                placeholder="Enter your custom analysis instructions here...",
+                lines=3,
+                info="Optional: Override the selected analysis type with a custom prompt"
             )
+            with gr.Accordion("Prompt Templates", open=False):
+                template_display = gr.Markdown()
+                def update_template_display(analysis_type):
+                    return f"**{analysis_type.title()} Template:**\n\n{analyzer.prompt_templates.get(analysis_type, 'No template available')}"
+                analysis_type.change(update_template_display, inputs=[analysis_type], outputs=[template_display])
+            with gr.Accordion("Advanced Settings", open=False):
+                include_florence = gr.Checkbox(
+                    label="Use Florence-2 Advanced Analysis",
+                    value=True,
+                    info="Include advanced computer vision analysis (if model available)"
+                )
                 confidence_threshold = gr.Slider(
                     minimum=0.1,
                     maximum=1.0,
                     value=0.5,
                     label="OCR Confidence Threshold"
                 )
             analyze_btn = gr.Button("🔍 Analyze Chart", variant="primary", size="lg")
             clear_btn = gr.Button("🗑️ Clear All", variant="secondary")
         with gr.Column(scale=2):
+            gr.Markdown("## 📋 Analysis Results")
+            with gr.Tabs():
+                with gr.Tab("📊 Formatted Results"):
+                    formatted_output = gr.Markdown(
+                        value="Upload an image and click 'Analyze Chart' to see results here.",
+                        label="Analysis Results"
+                    )
+                with gr.Tab("🔧 Structured JSON"):
+                    json_output = gr.JSON(
+                        label="Complete Structured Output",
+                        show_label=True
+                    )
+                with gr.Tab("📈 Data Export"):
+                    gr.Markdown("### Export Options")
+                    with gr.Row():
+                        json_download = gr.File(
+                            label="Download JSON Results",
+                            visible=False
+                        )
+                        csv_download = gr.File(
+                            label="Download CSV Data",
+                            visible=False
+                        )
+                    export_btn = gr.Button("📥 Generate Export Files")
+                    export_status = gr.Textbox(label="Export Status", interactive=False)
+    # Example section
+    gr.Markdown("## 🎯 Example Prompts")
+    example_prompts = [
+        ["What are the main trends shown in this chart?", "trend_analysis"],
+        ["Extract all numerical data points and their labels", "data_extraction"],
+        ["Describe this chart for accessibility purposes", "accessibility"],
+        ["What business insights can be derived from this data?", "business_insights"],
+        ["Analyze the performance metrics shown in this dashboard", "comprehensive"]
+    ]
+    gr.Examples(
+        examples=example_prompts,
+        inputs=[custom_prompt, analysis_type],
+        label="Try these example prompts:"
+    )
+    # Event handlers
+    def analyze_chart_comprehensive(image, analysis_type, custom_prompt, include_florence, confidence_threshold):
+        """Main analysis function with all parameters"""
+        if image is None:
+            return "Please upload an image first.", {}, "No data to export", "No data to export"
         try:
+            # Get structured output
+            structured_result = analyzer.analyze_chart_with_prompt(
+                image,
+                custom_prompt=custom_prompt.strip() if custom_prompt.strip() else None,
+                analysis_type=analysis_type
+            )
+            # Format for display
+            formatted_display = analyzer.format_results_for_display(structured_result)
+            return formatted_display, structured_result, "✅ Analysis completed successfully", "Ready for export"
         except Exception as e:
+            error_msg = f"❌ Analysis failed: {str(e)}"
+            return error_msg, {"error": str(e)}, error_msg, error_msg
+    def generate_export_files(json_data):
+        """Generate downloadable export files"""
+        if not json_data or json_data.get("error"):
+            return None, None, "❌ No valid data to export"
         try:
+            # Generate JSON file
+            json_str = json.dumps(json_data, indent=2, default=str)
+            json_file = io.StringIO(json_str)
+            # Generate CSV file if numerical data exists
+            csv_file = None
+            data_insights = json_data.get("data_insights", {})
+            if data_insights.get("numerical_data"):
+                df_data = {
+                    'Numerical_Values': data_insights["numerical_data"]
+                }
+                # Add categories if available
+                categories = data_insights.get("categories", [])
+                if categories:
+                    # Pad or trim categories to match numerical data length
+                    num_values = len(data_insights["numerical_data"])
+                    if len(categories) < num_values:
+                        categories.extend([""] * (num_values - len(categories)))
+                    else:
+                        categories = categories[:num_values]
+                    df_data['Categories'] = categories
+                # Add detected numbers with metadata
+                detected_numbers = json_data.get("text_extraction", {}).get("detected_numbers", [])
+                if detected_numbers:
+                    # Create a summary of detected numbers
+                    number_summary = []
+                    for num_data in detected_numbers:
+                        number_summary.append({
+                            'Value': num_data.get('value', ''),
+                            'Type': num_data.get('type', ''),
+                            'Position': str(num_data.get('position', ''))
+                        })
+                    # Convert to DataFrame
+                    numbers_df = pd.DataFrame(number_summary)
+                    csv_buffer = io.StringIO()
+                    numbers_df.to_csv(csv_buffer, index=False)
+                    csv_file = csv_buffer.getvalue()
+                else:
+                    # Fallback CSV with basic data
+                    df = pd.DataFrame(df_data)
+                    csv_buffer = io.StringIO()
+                    df.to_csv(csv_buffer, index=False)
+                    csv_file = csv_buffer.getvalue()
+            return json_str, csv_file, "✅ Export files generated successfully"
         except Exception as e:
+            return None, None, f"❌ Export failed: {str(e)}"
+    def clear_all_inputs():
+        """Clear all inputs and outputs"""
+        return (
+            None,  # image
+            "Upload an image and click 'Analyze Chart' to see results here.",  # formatted output
+            {},  # json output
+            "No data to export",  # export status
+            "",  # custom prompt
+            None,  # json download
+            None   # csv download
+        )
+    # Connect event handlers
     analyze_btn.click(
+        fn=analyze_chart_comprehensive,
+        inputs=[image_input, analysis_type, custom_prompt, include_florence, confidence_threshold],
+        outputs=[formatted_output, json_output, export_status, export_status]
     )
+    export_btn.click(
+        fn=generate_export_files,
+        inputs=[json_output],
+        outputs=[json_download, csv_download, export_status]
     )
     clear_btn.click(
+        fn=clear_all_inputs,
+        outputs=[image_input, formatted_output, json_output, export_status, custom_prompt, json_download, csv_download]
     )
+    # Initialize template display
+    template_display.value = update_template_display("comprehensive")
+# Additional helper functions for advanced features
+def load_image_from_url(url):
+    """Load image from URL"""
+    try:
+        response = requests.get(url, timeout=10)
+        response.raise_for_status()
+        image = Image.open(io.BytesIO(response.content))
+        return image, "✅ Image loaded successfully from URL"
+    except Exception as e:
+        return None, f"❌ Failed to load image: {str(e)}"
+# Add URL loading capability
+with demo:
+    with gr.Accordion("🌐 Load from URL", open=False):
+        url_input = gr.Textbox(
+            label="Image URL",
+            placeholder="https://example.com/chart.png"
+        )
+        load_url_btn = gr.Button("📥 Load from URL")
+        load_url_btn.click(
+            fn=load_image_from_url,
+            inputs=[url_input],
+            outputs=[image_input, export_status]
+        )
 if __name__ == "__main__":
+    print("🚀 Starting Enhanced Chart Analyzer...")
+    print("📊 Features:")
+    print("   - Structured JSON output")
+    print("   - Custom analysis prompts")
+    print("   - Multiple AI models (BLIP, TrOCR, EasyOCR, Florence-2)")
+    print("   - Data export capabilities")
+    print("   - Quality assessment")
+    print("   - Advanced visual analysis")
     try:
         demo.launch(
             server_name="0.0.0.0",
             server_port=7860,
             share=False,
             show_error=True,
+            debug=True
         )
     except Exception as e:
+        print(f"❌ Error launching app: {e}")
+        print("🔄 Trying fallback launch...")
         demo.launch()