Spaces:
Runtime error
Runtime error
| import os | |
| import io | |
| import json | |
| import logging | |
| import base64 | |
| import traceback | |
| from typing import Dict, List, Any, Optional, Tuple | |
| import torch | |
| import numpy as np | |
| import gradio as gr | |
| import folium | |
| import requests | |
| from geoclip import GeoCLIP, LocationEncoder | |
| from transformers import CLIPTokenizer | |
| from dataclasses import dataclass, asdict | |
| class MetacognitiveAssistant: | |
| """ | |
| Advanced multimodal AI assistant integrating GeoCLIP with metacognitive analysis framework. | |
| """ | |
| def __init__(self, device=None): | |
| """ | |
| Initialize the metacognitive assistant with GeoCLIP and advanced reasoning capabilities. | |
| Args: | |
| device (str, optional): Compute device for model. Defaults to CUDA if available. | |
| """ | |
| # Device and model configuration | |
| self.device = device or ("cuda" if torch.cuda.is_available() else "cpu") | |
| # GeoCLIP components | |
| self.geoclip_model = GeoCLIP().to(self.device) | |
| self.tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14") | |
| self.location_encoder = LocationEncoder().to(self.device) | |
| # Caching and logging | |
| self._cache = {} | |
| self.logger = self._configure_logger() | |
| # Analytical frameworks | |
| self.analytical_frameworks = { | |
| "multi_perspective": self._multi_perspective_analysis, | |
| "semantic_excavation": self._semantic_excavation, | |
| "cross_domain_bridging": self._cross_domain_bridging | |
| } | |
| print(f"MetacognitiveAssistant initialized on {self.device}") | |
| def _configure_logger(self): | |
| """ | |
| Configure a robust logging system with multiple output streams. | |
| Returns: | |
| logging.Logger: Configured logger instance | |
| """ | |
| logger = logging.getLogger("MetacognitiveAssistant") | |
| logger.setLevel(logging.DEBUG) | |
| # Console handler | |
| console_handler = logging.StreamHandler() | |
| console_handler.setLevel(logging.INFO) | |
| console_formatter = logging.Formatter( | |
| '%(asctime)s - %(name)s - %(levelname)s: %(message)s', | |
| datefmt='%Y-%m-%d %H:%M:%S' | |
| ) | |
| console_handler.setFormatter(console_formatter) | |
| logger.addHandler(console_handler) | |
| return logger | |
| def _multi_perspective_analysis(self, input_data: Dict[str, Any]) -> Dict[str, Any]: | |
| """ | |
| Apply multi-perspective analysis to input data. | |
| Args: | |
| input_data (Dict): Input data to analyze | |
| Returns: | |
| Dict with multi-perspective insights | |
| """ | |
| perspectives = { | |
| "quantitative": self._quantitative_perspective, | |
| "semantic": self._semantic_perspective, | |
| "systemic": self._systemic_perspective | |
| } | |
| multi_perspective_results = {} | |
| for name, perspective_func in perspectives.items(): | |
| try: | |
| multi_perspective_results[name] = perspective_func(input_data) | |
| except Exception as e: | |
| self.logger.warning(f"Error in {name} perspective: {e}") | |
| return multi_perspective_results | |
| def _quantitative_perspective(self, input_data: Dict[str, Any]) -> Dict[str, Any]: | |
| """Quantitative analysis perspective.""" | |
| # Implement quantitative analysis logic | |
| return { | |
| "metrics": {}, | |
| "statistical_summary": {} | |
| } | |
| def _semantic_perspective(self, input_data: Dict[str, Any]) -> Dict[str, Any]: | |
| """Semantic meaning extraction perspective.""" | |
| # Implement semantic analysis logic | |
| return { | |
| "implied_narratives": [], | |
| "conceptual_themes": [] | |
| } | |
| def _systemic_perspective(self, input_data: Dict[str, Any]) -> Dict[str, Any]: | |
| """Systemic relationship and interaction perspective.""" | |
| # Implement systemic analysis logic | |
| return { | |
| "system_interactions": {}, | |
| "emergent_properties": [] | |
| } | |
| def _semantic_excavation(self, input_data: Dict[str, Any]) -> Dict[str, Any]: | |
| """ | |
| Deep semantic excavation to extract profound meanings and implications. | |
| Args: | |
| input_data (Dict): Input data to excavate | |
| Returns: | |
| Dict with semantic insights | |
| """ | |
| # Implement deep semantic analysis | |
| return { | |
| "causal_narratives": [], | |
| "hidden_implications": [], | |
| "generative_principles": [] | |
| } | |
| def _cross_domain_bridging(self, input_data: Dict[str, Any]) -> Dict[str, Any]: | |
| """ | |
| Identify cross-domain pattern isomorphisms. | |
| Args: | |
| input_data (Dict): Input data to analyze | |
| Returns: | |
| Dict with cross-domain insights | |
| """ | |
| # Implement cross-domain pattern recognition | |
| return { | |
| "analogous_patterns": [], | |
| "domain_bridges": [], | |
| "transferable_insights": [] | |
| } | |
| def process_query(self, message: Dict[str, Any], history: List[Dict[str, Any]]) -> str: | |
| """ | |
| Primary query processing method with advanced metacognitive reasoning. | |
| Args: | |
| message (Dict): Input message with potential multimodal data | |
| history (List): Conversation history | |
| Returns: | |
| str: Processed response with metacognitive analysis | |
| """ | |
| try: | |
| # Preprocessing and input validation | |
| self.logger.info("Processing new query") | |
| # Route to appropriate processing based on input type | |
| if message.get("files") and len(message["files"]) > 0: | |
| # Multimodal image processing | |
| response = self._process_image_input(message["files"][0]) | |
| elif message.get("text"): | |
| # Text-based processing | |
| response = self._process_text_input(message["text"]) | |
| else: | |
| return "Invalid input. Please provide an image or text description." | |
| # Apply metacognitive analysis frameworks | |
| analysis_results = {} | |
| for framework_name, framework_func in self.analytical_frameworks.items(): | |
| try: | |
| analysis_results[framework_name] = framework_func({ | |
| "input": message, | |
| "response": response | |
| }) | |
| except Exception as e: | |
| self.logger.warning(f"Error in {framework_name} analysis: {e}") | |
| # Enhance response with metacognitive insights | |
| enhanced_response = self._generate_metacognitive_response( | |
| response, | |
| analysis_results | |
| ) | |
| return enhanced_response | |
| except Exception as e: | |
| error_details = traceback.format_exc() | |
| self.logger.error(f"Query processing error: {e}") | |
| return f"🚨 Error processing query:\n```\n{error_details}\n```" | |
| def _process_image_input(self, image_path: str) -> str: | |
| """ | |
| Process image input using GeoCLIP location predictions. | |
| Args: | |
| image_path (str): Path to input image | |
| Returns: | |
| str: Processed image analysis response | |
| """ | |
| predictions = self.predict_from_image(image_path) | |
| response = "### Image Location Analysis\n\n" | |
| for i, pred in enumerate(predictions[:3]): | |
| lat, lon = pred.coordinates | |
| conf = pred.confidence * 100 | |
| response += f"**#{i+1}:** Coordinates: ({lat:.6f}, {lon:.6f}) - Confidence: {conf:.2f}%\n\n" | |
| # Generate static map | |
| map_html = self.generate_static_map(predictions) | |
| response += f"<iframe srcdoc='{map_html}' width='100%' height='400px' frameborder='0'></iframe>" | |
| return response | |
| def _process_text_input(self, text_query: str) -> str: | |
| """ | |
| Process text input with advanced reasoning. | |
| Args: | |
| text_query (str): Input text query | |
| Returns: | |
| str: Processed text analysis response | |
| """ | |
| # Existing text-based location prediction | |
| predictions = self.predict_from_text(text_query) | |
| response = f"### Location Predictions for: '{text_query}'\n\n" | |
| for i, pred in enumerate(predictions[:3]): | |
| lat, lon = pred.coordinates | |
| conf = pred.confidence * 100 | |
| response += f"**#{i+1}:** Coordinates: ({lat:.6f}, {lon:.6f}) - Confidence: {conf:.2f}%\n\n" | |
| # Generate static map | |
| map_html = self.generate_static_map(predictions) | |
| response += f"<iframe srcdoc='{map_html}' width='100%' height='400px' frameborder='0'></iframe>" | |
| return response | |
| def _generate_metacognitive_response( | |
| self, | |
| base_response: str, | |
| analysis_results: Dict[str, Any] | |
| ) -> str: | |
| """ | |
| Enhance response with metacognitive analysis insights. | |
| Args: | |
| base_response (str): Original response | |
| analysis_results (Dict): Metacognitive analysis results | |
| Returns: | |
| str: Enhanced response with metacognitive insights | |
| """ | |
| metacognitive_insights = "### 🧠 Metacognitive Analysis\n\n" | |
| for framework, insights in analysis_results.items(): | |
| metacognitive_insights += f"#### {framework.replace('_', ' ').title()} Framework\n" | |
| # Summarize insights with fallback to prevent errors | |
| try: | |
| for key, value in insights.items(): | |
| if value: # Only include non-empty insights | |
| metacognitive_insights += f"- **{key.replace('_', ' ').title()}**: {value}\n" | |
| except Exception as e: | |
| self.logger.warning(f"Error generating {framework} insights: {e}") | |
| # Combine base response with metacognitive insights | |
| full_response = base_response + "\n\n" + metacognitive_insights | |
| return full_response | |
| # Existing GeoCLIP methods from previous implementation | |
| def predict_from_image(self, image_path) -> List[Dict]: | |
| """Existing image prediction method""" | |
| top_pred_gps, top_pred_prob = self.geoclip_model.predict(image_path, top_k=5) | |
| return [ | |
| { | |
| "coordinates": tuple(top_pred_gps[i].cpu().numpy()), | |
| "confidence": float(top_pred_prob[i]) | |
| } | |
| for i in range(len(top_pred_prob)) | |
| ] | |
| def predict_from_text(self, text: str, top_k: int = 5) -> List[Dict]: | |
| """Existing text-based prediction method""" | |
| # (Implement similar to previous implementation) | |
| cache_key = f"text_{text}_{top_k}" | |
| if cache_key in self._cache: | |
| return self._cache[cache_key] | |
| with torch.no_grad(): | |
| # Similar implementation to previous GeoCLIP text prediction | |
| inputs = self.tokenizer(text, return_tensors="pt").to(self.device) | |
| # ... rest of the prediction logic ... | |
| return [] # Placeholder | |
| def generate_static_map(self, predictions: List[Dict]) -> str: | |
| """Generate static map from predictions""" | |
| if not predictions: | |
| return "" | |
| center_coords = predictions[0]["coordinates"] | |
| m = folium.Map(location=center_coords, zoom_start=5) | |
| for i, pred in enumerate(predictions[:5]): | |
| color = 'red' if i == 0 else 'blue' if i == 1 else 'green' | |
| folium.Marker( | |
| location=pred["coordinates"], | |
| popup=f"#{i+1}: {pred['confidence']:.4f}", | |
| icon=folium.Icon(color=color) | |
| ).add_to(m) | |
| return m.get_root().render() | |
| # Gradio Interface | |
| def create_metacognitive_interface(): | |
| """ | |
| Create advanced Gradio interface for Metacognitive AI Assistant | |
| """ | |
| assistant = MetacognitiveAssistant() | |
| with gr.Blocks(theme=gr.themes.Default()) as demo: | |
| gr.Markdown("# 🧠 Metacognitive AI Location Intelligence") | |
| gr.Markdown(""" | |
| An advanced AI assistant that combines geospatial intelligence | |
| with deep metacognitive reasoning and analysis. | |
| - Upload an image or describe a location | |
| - Receive location predictions and deep analytical insights | |
| """) | |
| chatbot = gr.Chatbot( | |
| bubble_full_width=False, | |
| height=600, | |
| type="messages", | |
| avatar_images=("👤", "🌍"), | |
| layout="panel" | |
| ) | |
| chat_interface = gr.ChatInterface( | |
| fn=assistant.process_query, | |
| chatbot=chatbot, | |
| multimodal=True, | |
| textbox=gr.MultimodalTextbox( | |
| placeholder="Describe a location, upload an image...", | |
| sources=["upload"], | |
| file_types=["image"], | |
| show_label=False | |
| ), | |
| autofocus=True, | |
| submit_btn="Analyze", | |
| examples=[ | |
| "Describe a tropical beach landscape", | |
| "Urban cityscape with modern architecture" | |
| ] | |
| ) | |
| return demo | |
| def main(): | |
| """Launch the Metacognitive AI Assistant""" | |
| demo = create_metacognitive_interface() | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False | |
| ) | |
| if __name__ == "__main__": | |
| main() | |