Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import anthropic | |
| import PyPDF2 | |
| import pandas as pd | |
| import numpy as np | |
| import io | |
| import os | |
| import json | |
| import zipfile | |
| import tempfile | |
| from typing import Dict, List, Tuple, Union, Optional | |
| import re | |
| from pathlib import Path | |
| import openpyxl | |
| from dataclasses import dataclass | |
| from enum import Enum | |
| from docx import Document | |
| from docx.shared import Inches, Pt, RGBColor | |
| from docx.enum.text import WD_ALIGN_PARAGRAPH | |
| from reportlab.lib import colors | |
| from reportlab.lib.pagesizes import letter, A4 | |
| from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, PageBreak | |
| from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
| from reportlab.lib.units import inch | |
| from reportlab.pdfbase import pdfmetrics | |
| from reportlab.pdfbase.ttfonts import TTFont | |
| import matplotlib.pyplot as plt | |
| from datetime import datetime | |
| # Configuración para HuggingFace | |
| os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False' | |
| # Inicializar cliente Anthropic | |
| client = anthropic.Anthropic() | |
| # Sistema de traducción - Actualizado con nuevas entradas | |
| TRANSLATIONS = { | |
| 'en': { | |
| 'title': '🧬 Comparative Analyzer of Biotechnological Models', | |
| 'subtitle': 'Specialized in comparative analysis of mathematical model fitting results', | |
| 'upload_files': '📁 Upload fitting results (CSV/Excel)', | |
| 'select_model': '🤖 Claude Model', | |
| 'select_language': '🌐 Language', | |
| 'select_theme': '🎨 Theme', | |
| 'detail_level': '📋 Analysis detail level', | |
| 'detailed': 'Detailed', | |
| 'summarized': 'Summarized', | |
| 'analyze_button': '🚀 Analyze and Compare Models', | |
| 'export_format': '📄 Export format', | |
| 'export_button': '💾 Export Report', | |
| 'comparative_analysis': '📊 Comparative Analysis', | |
| 'implementation_code': '💻 Implementation Code', | |
| 'data_format': '📋 Expected data format', | |
| 'examples': '📚 Analysis examples', | |
| 'light': 'Light', | |
| 'dark': 'Dark', | |
| 'best_for': 'Best for', | |
| 'loading': 'Loading...', | |
| 'error_no_api': 'Please configure ANTHROPIC_API_KEY in HuggingFace Space secrets', | |
| 'error_no_files': 'Please upload fitting result files to analyze', | |
| 'report_exported': 'Report exported successfully as', | |
| 'specialized_in': '🎯 Specialized in:', | |
| 'metrics_analyzed': '📊 Analyzed metrics:', | |
| 'what_analyzes': '🔍 What it specifically analyzes:', | |
| 'tips': '💡 Tips for better results:', | |
| 'additional_specs': '📝 Additional specifications for analysis', | |
| 'additional_specs_placeholder': 'Add any specific requirements or focus areas for the analysis...' | |
| }, | |
| 'es': { | |
| 'title': '🧬 Analizador Comparativo de Modelos Biotecnológicos', | |
| 'subtitle': 'Especializado en análisis comparativo de resultados de ajuste de modelos matemáticos', | |
| 'upload_files': '📁 Subir resultados de ajuste (CSV/Excel)', | |
| 'select_model': '🤖 Modelo Claude', | |
| 'select_language': '🌐 Idioma', | |
| 'select_theme': '🎨 Tema', | |
| 'detail_level': '📋 Nivel de detalle del análisis', | |
| 'detailed': 'Detallado', | |
| 'summarized': 'Resumido', | |
| 'analyze_button': '🚀 Analizar y Comparar Modelos', | |
| 'export_format': '📄 Formato de exportación', | |
| 'export_button': '💾 Exportar Reporte', | |
| 'comparative_analysis': '📊 Análisis Comparativo', | |
| 'implementation_code': '💻 Código de Implementación', | |
| 'data_format': '📋 Formato de datos esperado', | |
| 'examples': '📚 Ejemplos de análisis', | |
| 'light': 'Claro', | |
| 'dark': 'Oscuro', | |
| 'best_for': 'Mejor para', | |
| 'loading': 'Cargando...', | |
| 'error_no_api': 'Por favor configura ANTHROPIC_API_KEY en los secretos del Space', | |
| 'error_no_files': 'Por favor sube archivos con resultados de ajuste para analizar', | |
| 'report_exported': 'Reporte exportado exitosamente como', | |
| 'specialized_in': '🎯 Especializado en:', | |
| 'metrics_analyzed': '📊 Métricas analizadas:', | |
| 'what_analyzes': '🔍 Qué analiza específicamente:', | |
| 'tips': '💡 Tips para mejores resultados:', | |
| 'additional_specs': '📝 Especificaciones adicionales para el análisis', | |
| 'additional_specs_placeholder': 'Agregue cualquier requerimiento específico o áreas de enfoque para el análisis...' | |
| }, | |
| 'fr': { | |
| 'title': '🧬 Analyseur Comparatif de Modèles Biotechnologiques', | |
| 'subtitle': 'Spécialisé dans l\'analyse comparative des résultats d\'ajustement', | |
| 'upload_files': '📁 Télécharger les résultats (CSV/Excel)', | |
| 'select_model': '🤖 Modèle Claude', | |
| 'select_language': '🌐 Langue', | |
| 'select_theme': '🎨 Thème', | |
| 'detail_level': '📋 Niveau de détail', | |
| 'detailed': 'Détaillé', | |
| 'summarized': 'Résumé', | |
| 'analyze_button': '🚀 Analyser et Comparer', | |
| 'export_format': '📄 Format d\'export', | |
| 'export_button': '💾 Exporter le Rapport', | |
| 'comparative_analysis': '📊 Analyse Comparative', | |
| 'implementation_code': '💻 Code d\'Implémentation', | |
| 'data_format': '📋 Format de données attendu', | |
| 'examples': '📚 Exemples d\'analyse', | |
| 'light': 'Clair', | |
| 'dark': 'Sombre', | |
| 'best_for': 'Meilleur pour', | |
| 'loading': 'Chargement...', | |
| 'error_no_api': 'Veuillez configurer ANTHROPIC_API_KEY', | |
| 'error_no_files': 'Veuillez télécharger des fichiers à analyser', | |
| 'report_exported': 'Rapport exporté avec succès comme', | |
| 'specialized_in': '🎯 Spécialisé dans:', | |
| 'metrics_analyzed': '📊 Métriques analysées:', | |
| 'what_analyzes': '🔍 Ce qu\'il analyse spécifiquement:', | |
| 'tips': '💡 Conseils pour de meilleurs résultats:', | |
| 'additional_specs': '📝 Spécifications supplémentaires pour l\'analyse', | |
| 'additional_specs_placeholder': 'Ajoutez des exigences spécifiques ou des domaines d\'intérêt pour l\'analyse...' | |
| }, | |
| 'de': { | |
| 'title': '🧬 Vergleichender Analysator für Biotechnologische Modelle', | |
| 'subtitle': 'Spezialisiert auf vergleichende Analyse von Modellanpassungsergebnissen', | |
| 'upload_files': '📁 Ergebnisse hochladen (CSV/Excel)', | |
| 'select_model': '🤖 Claude Modell', | |
| 'select_language': '🌐 Sprache', | |
| 'select_theme': '🎨 Thema', | |
| 'detail_level': '📋 Detailgrad der Analyse', | |
| 'detailed': 'Detailliert', | |
| 'summarized': 'Zusammengefasst', | |
| 'analyze_button': '🚀 Analysieren und Vergleichen', | |
| 'export_format': '📄 Exportformat', | |
| 'export_button': '💾 Bericht Exportieren', | |
| 'comparative_analysis': '📊 Vergleichende Analyse', | |
| 'implementation_code': '💻 Implementierungscode', | |
| 'data_format': '📋 Erwartetes Datenformat', | |
| 'examples': '📚 Analysebeispiele', | |
| 'light': 'Hell', | |
| 'dark': 'Dunkel', | |
| 'best_for': 'Am besten für', | |
| 'loading': 'Laden...', | |
| 'error_no_api': 'Bitte konfigurieren Sie ANTHROPIC_API_KEY', | |
| 'error_no_files': 'Bitte laden Sie Dateien zur Analyse hoch', | |
| 'report_exported': 'Bericht erfolgreich exportiert als', | |
| 'specialized_in': '🎯 Spezialisiert auf:', | |
| 'metrics_analyzed': '📊 Analysierte Metriken:', | |
| 'what_analyzes': '🔍 Was spezifisch analysiert wird:', | |
| 'tips': '💡 Tipps für bessere Ergebnisse:', | |
| 'additional_specs': '📝 Zusätzliche Spezifikationen für die Analyse', | |
| 'additional_specs_placeholder': 'Fügen Sie spezifische Anforderungen oder Schwerpunktbereiche für die Analyse hinzu...' | |
| }, | |
| 'pt': { | |
| 'title': '🧬 Analisador Comparativo de Modelos Biotecnológicos', | |
| 'subtitle': 'Especializado em análise comparativa de resultados de ajuste', | |
| 'upload_files': '📁 Carregar resultados (CSV/Excel)', | |
| 'select_model': '🤖 Modelo Claude', | |
| 'select_language': '🌐 Idioma', | |
| 'select_theme': '🎨 Tema', | |
| 'detail_level': '📋 Nível de detalhe', | |
| 'detailed': 'Detalhado', | |
| 'summarized': 'Resumido', | |
| 'analyze_button': '🚀 Analisar e Comparar', | |
| 'export_format': '📄 Formato de exportação', | |
| 'export_button': '💾 Exportar Relatório', | |
| 'comparative_analysis': '📊 Análise Comparativa', | |
| 'implementation_code': '💻 Código de Implementação', | |
| 'data_format': '📋 Formato de dados esperado', | |
| 'examples': '📚 Exemplos de análise', | |
| 'light': 'Claro', | |
| 'dark': 'Escuro', | |
| 'best_for': 'Melhor para', | |
| 'loading': 'Carregando...', | |
| 'error_no_api': 'Por favor configure ANTHROPIC_API_KEY', | |
| 'error_no_files': 'Por favor carregue arquivos para analisar', | |
| 'report_exported': 'Relatório exportado com sucesso como', | |
| 'specialized_in': '🎯 Especializado em:', | |
| 'metrics_analyzed': '📊 Métricas analisadas:', | |
| 'what_analyzes': '🔍 O que analisa especificamente:', | |
| 'tips': '💡 Dicas para melhores resultados:', | |
| 'additional_specs': '📝 Especificações adicionais para a análise', | |
| 'additional_specs_placeholder': 'Adicione requisitos específicos ou áreas de foco para a análise...' | |
| } | |
| } | |
| # Temas disponibles | |
| THEMES = { | |
| 'light': gr.themes.Soft(), | |
| 'dark': gr.themes.Base( | |
| primary_hue="blue", | |
| secondary_hue="gray", | |
| neutral_hue="gray", | |
| font=["Arial", "sans-serif"] | |
| ).set( | |
| body_background_fill="dark", | |
| body_background_fill_dark="*neutral_950", | |
| button_primary_background_fill="*primary_600", | |
| button_primary_background_fill_hover="*primary_500", | |
| button_primary_text_color="white", | |
| block_background_fill="*neutral_800", | |
| block_border_color="*neutral_700", | |
| block_label_text_color="*neutral_200", | |
| block_title_text_color="*neutral_100", | |
| checkbox_background_color="*neutral_700", | |
| checkbox_background_color_selected="*primary_600", | |
| input_background_fill="*neutral_700", | |
| input_border_color="*neutral_600", | |
| input_placeholder_color="*neutral_400" | |
| ) | |
| } | |
| # Enum para tipos de análisis | |
| class AnalysisType(Enum): | |
| MATHEMATICAL_MODEL = "mathematical_model" | |
| DATA_FITTING = "data_fitting" | |
| FITTING_RESULTS = "fitting_results" | |
| UNKNOWN = "unknown" | |
| # Estructura modular para modelos | |
| class MathematicalModel: | |
| name: str | |
| equation: str | |
| parameters: List[str] | |
| application: str | |
| sources: List[str] | |
| category: str | |
| biological_meaning: str | |
| # Sistema de registro de modelos escalable | |
| class ModelRegistry: | |
| def __init__(self): | |
| self.models = {} | |
| self._initialize_default_models() | |
| def register_model(self, model: MathematicalModel): | |
| """Registra un nuevo modelo matemático""" | |
| if model.category not in self.models: | |
| self.models[model.category] = {} | |
| self.models[model.category][model.name] = model | |
| def get_model(self, category: str, name: str) -> MathematicalModel: | |
| """Obtiene un modelo específico""" | |
| return self.models.get(category, {}).get(name) | |
| def get_all_models(self) -> Dict: | |
| """Retorna todos los modelos registrados""" | |
| return self.models | |
| def _initialize_default_models(self): | |
| """Inicializa los modelos por defecto""" | |
| # Modelos de crecimiento | |
| self.register_model(MathematicalModel( | |
| name="Monod", | |
| equation="μ = μmax × (S / (Ks + S))", | |
| parameters=["μmax (h⁻¹)", "Ks (g/L)"], | |
| application="Crecimiento limitado por sustrato único", | |
| sources=["Cambridge", "MIT", "DTU"], | |
| category="crecimiento_biomasa", | |
| biological_meaning="Describe cómo la velocidad de crecimiento depende de la concentración de sustrato limitante" | |
| )) | |
| self.register_model(MathematicalModel( | |
| name="Logístico", | |
| equation="dX/dt = μmax × X × (1 - X/Xmax)", | |
| parameters=["μmax (h⁻¹)", "Xmax (g/L)"], | |
| application="Sistemas cerrados batch", | |
| sources=["Cranfield", "Swansea", "HAL Theses"], | |
| category="crecimiento_biomasa", | |
| biological_meaning="Modela crecimiento limitado por capacidad de carga del sistema" | |
| )) | |
| self.register_model(MathematicalModel( | |
| name="Gompertz", | |
| equation="X(t) = Xmax × exp(-exp((μmax × e / Xmax) × (λ - t) + 1))", | |
| parameters=["λ (h)", "μmax (h⁻¹)", "Xmax (g/L)"], | |
| application="Crecimiento con fase lag pronunciada", | |
| sources=["Lund University", "NC State"], | |
| category="crecimiento_biomasa", | |
| biological_meaning="Incluye fase de adaptación (lag) seguida de crecimiento exponencial y estacionario" | |
| )) | |
| # Instancia global del registro | |
| model_registry = ModelRegistry() | |
| # Modelos de Claude disponibles | |
| CLAUDE_MODELS = { | |
| "claude-opus-4-20250514": { | |
| "name": "Claude Opus 4 (Latest)", | |
| "description": "Modelo más potente para desafíos complejos", | |
| "max_tokens": 4000, | |
| "best_for": "Análisis muy detallados y complejos" | |
| }, | |
| "claude-sonnet-4-20250514": { | |
| "name": "Claude Sonnet 4 (Latest)", | |
| "description": "Modelo inteligente y eficiente para uso cotidiano", | |
| "max_tokens": 4000, | |
| "best_for": "Análisis general, recomendado para la mayoría de casos" | |
| }, | |
| "claude-3-5-haiku-20241022": { | |
| "name": "Claude 3.5 Haiku (Latest)", | |
| "description": "Modelo más rápido para tareas diarias", | |
| "max_tokens": 4000, | |
| "best_for": "Análisis rápidos y económicos" | |
| }, | |
| "claude-3-7-sonnet-20250219": { | |
| "name": "Claude 3.7 Sonnet", | |
| "description": "Modelo avanzado de la serie 3.7", | |
| "max_tokens": 4000, | |
| "best_for": "Análisis equilibrados con alta calidad" | |
| }, | |
| "claude-3-5-sonnet-20241022": { | |
| "name": "Claude 3.5 Sonnet (Oct 2024)", | |
| "description": "Excelente balance entre velocidad y capacidad", | |
| "max_tokens": 4000, | |
| "best_for": "Análisis rápidos y precisos" | |
| } | |
| } | |
| class FileProcessor: | |
| """Clase para procesar diferentes tipos de archivos""" | |
| def extract_text_from_pdf(pdf_file) -> str: | |
| """Extrae texto de un archivo PDF""" | |
| try: | |
| pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file)) | |
| text = "" | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() + "\n" | |
| return text | |
| except Exception as e: | |
| return f"Error reading PDF: {str(e)}" | |
| def read_csv(csv_file) -> pd.DataFrame: | |
| """Lee archivo CSV""" | |
| try: | |
| return pd.read_csv(io.BytesIO(csv_file)) | |
| except Exception as e: | |
| return None | |
| def read_excel(excel_file) -> pd.DataFrame: | |
| """Lee archivo Excel""" | |
| try: | |
| return pd.read_excel(io.BytesIO(excel_file)) | |
| except Exception as e: | |
| return None | |
| def extract_from_zip(zip_file) -> List[Tuple[str, bytes]]: | |
| """Extrae archivos de un ZIP""" | |
| files = [] | |
| try: | |
| with zipfile.ZipFile(io.BytesIO(zip_file), 'r') as zip_ref: | |
| for file_name in zip_ref.namelist(): | |
| if not file_name.startswith('__MACOSX'): | |
| file_data = zip_ref.read(file_name) | |
| files.append((file_name, file_data)) | |
| except Exception as e: | |
| print(f"Error processing ZIP: {e}") | |
| return files | |
| class ReportExporter: | |
| """Clase para exportar reportes a diferentes formatos""" | |
| def export_to_docx(content: str, filename: str, language: str = 'en') -> str: | |
| """Exporta el contenido a un archivo DOCX""" | |
| doc = Document() | |
| # Configurar estilos | |
| title_style = doc.styles['Title'] | |
| title_style.font.size = Pt(24) | |
| title_style.font.bold = True | |
| heading_style = doc.styles['Heading 1'] | |
| heading_style.font.size = Pt(18) | |
| heading_style.font.bold = True | |
| # Título | |
| title_text = { | |
| 'en': 'Comparative Analysis Report - Biotechnological Models', | |
| 'es': 'Informe de Análisis Comparativo - Modelos Biotecnológicos', | |
| 'fr': 'Rapport d\'Analyse Comparative - Modèles Biotechnologiques', | |
| 'de': 'Vergleichsanalysebericht - Biotechnologische Modelle', | |
| 'pt': 'Relatório de Análise Comparativa - Modelos Biotecnológicos' | |
| } | |
| doc.add_heading(title_text.get(language, title_text['en']), 0) | |
| # Fecha | |
| date_text = { | |
| 'en': 'Generated on', | |
| 'es': 'Generado el', | |
| 'fr': 'Généré le', | |
| 'de': 'Erstellt am', | |
| 'pt': 'Gerado em' | |
| } | |
| doc.add_paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") | |
| doc.add_paragraph() | |
| # Procesar contenido | |
| lines = content.split('\n') | |
| current_paragraph = None | |
| for line in lines: | |
| line = line.strip() | |
| if line.startswith('###'): | |
| doc.add_heading(line.replace('###', '').strip(), level=2) | |
| elif line.startswith('##'): | |
| doc.add_heading(line.replace('##', '').strip(), level=1) | |
| elif line.startswith('#'): | |
| doc.add_heading(line.replace('#', '').strip(), level=0) | |
| elif line.startswith('**') and line.endswith('**'): | |
| # Texto en negrita | |
| p = doc.add_paragraph() | |
| run = p.add_run(line.replace('**', '')) | |
| run.bold = True | |
| elif line.startswith('- ') or line.startswith('* '): | |
| # Lista | |
| doc.add_paragraph(line[2:], style='List Bullet') | |
| elif line.startswith(tuple('0123456789')): | |
| # Lista numerada | |
| doc.add_paragraph(line, style='List Number') | |
| elif line == '---' or line.startswith('==='): | |
| # Separador | |
| doc.add_paragraph('_' * 50) | |
| elif line: | |
| # Párrafo normal | |
| doc.add_paragraph(line) | |
| # Guardar documento | |
| doc.save(filename) | |
| return filename | |
| def export_to_pdf(content: str, filename: str, language: str = 'en') -> str: | |
| """Exporta el contenido a un archivo PDF""" | |
| # Crear documento PDF | |
| doc = SimpleDocTemplate(filename, pagesize=letter) | |
| story = [] | |
| styles = getSampleStyleSheet() | |
| # Estilos personalizados | |
| title_style = ParagraphStyle( | |
| 'CustomTitle', | |
| parent=styles['Title'], | |
| fontSize=24, | |
| textColor=colors.HexColor('#1f4788'), | |
| spaceAfter=30 | |
| ) | |
| heading_style = ParagraphStyle( | |
| 'CustomHeading', | |
| parent=styles['Heading1'], | |
| fontSize=16, | |
| textColor=colors.HexColor('#2e5090'), | |
| spaceAfter=12 | |
| ) | |
| # Título | |
| title_text = { | |
| 'en': 'Comparative Analysis Report - Biotechnological Models', | |
| 'es': 'Informe de Análisis Comparativo - Modelos Biotecnológicos', | |
| 'fr': 'Rapport d\'Analyse Comparative - Modèles Biotechnologiques', | |
| 'de': 'Vergleichsanalysebericht - Biotechnologische Modelle', | |
| 'pt': 'Relatório de Análise Comparativa - Modelos Biotecnológicos' | |
| } | |
| story.append(Paragraph(title_text.get(language, title_text['en']), title_style)) | |
| # Fecha | |
| date_text = { | |
| 'en': 'Generated on', | |
| 'es': 'Generado el', | |
| 'fr': 'Généré le', | |
| 'de': 'Erstellt am', | |
| 'pt': 'Gerado em' | |
| } | |
| story.append(Paragraph(f"{date_text.get(language, date_text['en'])}: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", styles['Normal'])) | |
| story.append(Spacer(1, 0.5*inch)) | |
| # Procesar contenido | |
| lines = content.split('\n') | |
| for line in lines: | |
| line = line.strip() | |
| if not line: | |
| story.append(Spacer(1, 0.2*inch)) | |
| elif line.startswith('###'): | |
| story.append(Paragraph(line.replace('###', '').strip(), styles['Heading3'])) | |
| elif line.startswith('##'): | |
| story.append(Paragraph(line.replace('##', '').strip(), styles['Heading2'])) | |
| elif line.startswith('#'): | |
| story.append(Paragraph(line.replace('#', '').strip(), heading_style)) | |
| elif line.startswith('**') and line.endswith('**'): | |
| text = line.replace('**', '') | |
| story.append(Paragraph(f"<b>{text}</b>", styles['Normal'])) | |
| elif line.startswith('- ') or line.startswith('* '): | |
| story.append(Paragraph(f"• {line[2:]}", styles['Normal'])) | |
| elif line == '---' or line.startswith('==='): | |
| story.append(Spacer(1, 0.3*inch)) | |
| story.append(Paragraph("_" * 70, styles['Normal'])) | |
| story.append(Spacer(1, 0.3*inch)) | |
| else: | |
| # Limpiar caracteres especiales para PDF | |
| clean_line = line.replace('📊', '[GRAPH]').replace('🎯', '[TARGET]').replace('🔍', '[SEARCH]').replace('💡', '[TIP]') | |
| story.append(Paragraph(clean_line, styles['Normal'])) | |
| # Construir PDF | |
| doc.build(story) | |
| return filename | |
| class AIAnalyzer: | |
| """Clase para análisis con IA""" | |
| def __init__(self, client, model_registry): | |
| self.client = client | |
| self.model_registry = model_registry | |
| def detect_analysis_type(self, content: Union[str, pd.DataFrame]) -> AnalysisType: | |
| """Detecta el tipo de análisis necesario""" | |
| if isinstance(content, pd.DataFrame): | |
| columns = [col.lower() for col in content.columns] | |
| fitting_indicators = [ | |
| 'r2', 'r_squared', 'rmse', 'mse', 'aic', 'bic', | |
| 'parameter', 'param', 'coefficient', 'fit', | |
| 'model', 'equation', 'goodness', 'chi_square', | |
| 'p_value', 'confidence', 'standard_error', 'se' | |
| ] | |
| has_fitting_results = any(indicator in ' '.join(columns) for indicator in fitting_indicators) | |
| if has_fitting_results: | |
| return AnalysisType.FITTING_RESULTS | |
| else: | |
| return AnalysisType.DATA_FITTING | |
| prompt = """ | |
| Analyze this content and determine if it is: | |
| 1. A scientific article describing biotechnological mathematical models | |
| 2. Experimental data for parameter fitting | |
| 3. Model fitting results (with parameters, R², RMSE, etc.) | |
| Reply only with: "MODEL", "DATA" or "RESULTS" | |
| """ | |
| try: | |
| response = self.client.messages.create( | |
| model="claude-3-haiku-20240307", | |
| max_tokens=10, | |
| messages=[{"role": "user", "content": f"{prompt}\n\n{content[:1000]}"}] | |
| ) | |
| result = response.content[0].text.strip().upper() | |
| if "MODEL" in result: | |
| return AnalysisType.MATHEMATICAL_MODEL | |
| elif "RESULTS" in result: | |
| return AnalysisType.FITTING_RESULTS | |
| elif "DATA" in result: | |
| return AnalysisType.DATA_FITTING | |
| else: | |
| return AnalysisType.UNKNOWN | |
| except: | |
| return AnalysisType.UNKNOWN | |
| def get_language_prompt_prefix(self, language: str) -> str: | |
| """Obtiene el prefijo del prompt según el idioma""" | |
| prefixes = { | |
| 'en': "Please respond in English. ", | |
| 'es': "Por favor responde en español. ", | |
| 'fr': "Veuillez répondre en français. ", | |
| 'de': "Bitte antworten Sie auf Deutsch. ", | |
| 'pt': "Por favor responda em português. " | |
| } | |
| return prefixes.get(language, prefixes['en']) | |
| def analyze_fitting_results(self, data: pd.DataFrame, claude_model: str, detail_level: str = "detailed", | |
| language: str = "en", additional_specs: str = "") -> Dict: | |
| """Analiza resultados de ajuste de modelos con soporte multiidioma y especificaciones adicionales""" | |
| # Preparar resumen completo de los datos | |
| data_summary = f""" | |
| FITTING RESULTS DATA: | |
| Data structure: | |
| - Columns: {list(data.columns)} | |
| - Number of models evaluated: {len(data)} | |
| Complete data: | |
| {data.to_string()} | |
| Descriptive statistics: | |
| {data.describe().to_string()} | |
| """ | |
| # Extraer valores para usar en el código | |
| data_dict = data.to_dict('records') | |
| # Obtener prefijo de idioma | |
| lang_prefix = self.get_language_prompt_prefix(language) | |
| # Agregar especificaciones adicionales del usuario si existen | |
| user_specs_section = f""" | |
| USER ADDITIONAL SPECIFICATIONS: | |
| {additional_specs} | |
| Please ensure to address these specific requirements in your analysis. | |
| """ if additional_specs else "" | |
| # Prompt mejorado con instrucciones específicas para cada nivel | |
| if detail_level == "detailed": | |
| prompt = f""" | |
| {lang_prefix} | |
| You are an expert in biotechnology and mathematical modeling. Analyze these kinetic/biotechnological model fitting results. | |
| {user_specs_section} | |
| DETAIL LEVEL: DETAILED - Provide comprehensive analysis BY EXPERIMENT | |
| PERFORM A COMPREHENSIVE COMPARATIVE ANALYSIS PER EXPERIMENT: | |
| 1. **EXPERIMENT IDENTIFICATION AND OVERVIEW** | |
| - List ALL experiments/conditions tested (e.g., pH levels, temperatures, time points) | |
| - For EACH experiment, identify: | |
| * Experimental conditions | |
| * Number of models tested | |
| * Variables measured (biomass, substrate, product) | |
| 2. **MODEL IDENTIFICATION AND CLASSIFICATION BY EXPERIMENT** | |
| For EACH EXPERIMENT separately: | |
| - Identify ALL fitted mathematical models BY NAME | |
| - Classify them: biomass growth, substrate consumption, product formation | |
| - Show the mathematical equation of each model | |
| - List parameter values obtained for that specific experiment | |
| 3. **COMPARATIVE ANALYSIS PER EXPERIMENT** | |
| Create a section for EACH EXPERIMENT showing: | |
| **EXPERIMENT [Name/Condition]:** | |
| a) **BIOMASS MODELS** (if applicable): | |
| - Best model: [Name] with R²=[value], RMSE=[value] | |
| - Parameters: μmax=[value], Xmax=[value], etc. | |
| - Ranking of all biomass models tested | |
| b) **SUBSTRATE MODELS** (if applicable): | |
| - Best model: [Name] with R²=[value], RMSE=[value] | |
| - Parameters: Ks=[value], Yxs=[value], etc. | |
| - Ranking of all substrate models tested | |
| c) **PRODUCT MODELS** (if applicable): | |
| - Best model: [Name] with R²=[value], RMSE=[value] | |
| - Parameters: α=[value], β=[value], etc. | |
| - Ranking of all product models tested | |
| 4. **DETAILED COMPARATIVE TABLES** | |
| **Table 1: Summary by Experiment and Variable Type** | |
| | Experiment | Variable | Best Model | R² | RMSE | Key Parameters | Ranking | | |
| |------------|----------|------------|-------|------|----------------|---------| | |
| | Exp1 | Biomass | [Name] | [val] | [val]| μmax=X | 1 | | |
| | Exp1 | Substrate| [Name] | [val] | [val]| Ks=Y | 1 | | |
| | Exp1 | Product | [Name] | [val] | [val]| α=Z | 1 | | |
| | Exp2 | Biomass | [Name] | [val] | [val]| μmax=X2 | 1 | | |
| **Table 2: Complete Model Comparison Across All Experiments** | |
| | Model Name | Type | Exp1_R² | Exp1_RMSE | Exp2_R² | Exp2_RMSE | Avg_R² | Best_For | | |
| 5. **PARAMETER ANALYSIS ACROSS EXPERIMENTS** | |
| - Compare how parameters change between experiments | |
| - Identify trends (e.g., μmax increases with temperature) | |
| - Calculate average parameters and variability | |
| - Suggest optimal conditions based on parameters | |
| 6. **BIOLOGICAL INTERPRETATION BY EXPERIMENT** | |
| For each experiment, explain: | |
| - What the parameter values mean biologically | |
| - Whether values are realistic for the conditions | |
| - Key differences between experiments | |
| - Critical control parameters identified | |
| 7. **OVERALL BEST MODELS DETERMINATION** | |
| - **BEST BIOMASS MODEL OVERALL**: [Name] - performs best in [X] out of [Y] experiments | |
| - **BEST SUBSTRATE MODEL OVERALL**: [Name] - average R²=[value] | |
| - **BEST PRODUCT MODEL OVERALL**: [Name] - most consistent across conditions | |
| Justify with numerical evidence from multiple experiments. | |
| 8. **CONCLUSIONS AND RECOMMENDATIONS** | |
| - Which models are most robust across different conditions | |
| - Specific models to use for each experimental condition | |
| - Confidence intervals and prediction reliability | |
| - Scale-up recommendations with specific values | |
| Use Markdown format with clear structure. Include ALL numerical values from the data. | |
| Create clear sections for EACH EXPERIMENT. | |
| """ | |
| else: # summarized | |
| prompt = f""" | |
| {lang_prefix} | |
| You are an expert in biotechnology. Provide a CONCISE but COMPLETE analysis BY EXPERIMENT. | |
| {user_specs_section} | |
| DETAIL LEVEL: SUMMARIZED - Be concise but include all experiments and essential information | |
| PROVIDE A FOCUSED COMPARATIVE ANALYSIS: | |
| 1. **EXPERIMENTS OVERVIEW** | |
| - Total experiments analyzed: [number] | |
| - Conditions tested: [list] | |
| - Variables measured: biomass/substrate/product | |
| 2. **BEST MODELS BY EXPERIMENT - QUICK SUMMARY** | |
| 📊 **EXPERIMENT 1 [Name/Condition]:** | |
| - Biomass: [Model] (R²=[value]) | |
| - Substrate: [Model] (R²=[value]) | |
| - Product: [Model] (R²=[value]) | |
| 📊 **EXPERIMENT 2 [Name/Condition]:** | |
| - Biomass: [Model] (R²=[value]) | |
| - Substrate: [Model] (R²=[value]) | |
| - Product: [Model] (R²=[value]) | |
| [Continue for all experiments...] | |
| 3. **OVERALL WINNERS ACROSS ALL EXPERIMENTS** | |
| 🏆 **Best Models Overall:** | |
| - **Biomass**: [Model] - Best in [X]/[Y] experiments | |
| - **Substrate**: [Model] - Average R²=[value] | |
| - **Product**: [Model] - Most consistent performance | |
| 4. **QUICK COMPARISON TABLE** | |
| | Experiment | Best Biomass | Best Substrate | Best Product | Overall R² | | |
| |------------|--------------|----------------|--------------|------------| | |
| | Exp1 | [Model] | [Model] | [Model] | [avg] | | |
| | Exp2 | [Model] | [Model] | [Model] | [avg] | | |
| 5. **KEY FINDINGS** | |
| - Parameter ranges across experiments: μmax=[min-max], Ks=[min-max] | |
| - Best conditions identified: [specific values] | |
| - Most robust models: [list with reasons] | |
| 6. **PRACTICAL RECOMMENDATIONS** | |
| - For biomass prediction: Use [Model] | |
| - For substrate monitoring: Use [Model] | |
| - For product estimation: Use [Model] | |
| - Critical parameters: [list with values] | |
| Keep it concise but include ALL experiments and model names with their key metrics. | |
| """ | |
| try: | |
| response = self.client.messages.create( | |
| model=claude_model, | |
| max_tokens=4000, | |
| messages=[{ | |
| "role": "user", | |
| "content": f"{prompt}\n\n{data_summary}" | |
| }] | |
| ) | |
| # Análisis adicional para generar código con valores numéricos reales | |
| code_prompt = f""" | |
| {lang_prefix} | |
| Based on the analysis and this actual data: | |
| {data.to_string()} | |
| Generate Python code that: | |
| 1. Creates a complete analysis system with the ACTUAL NUMERICAL VALUES from the data | |
| 2. Implements analysis BY EXPERIMENT showing: | |
| - Best models for each experiment | |
| - Comparison across experiments | |
| - Parameter evolution between conditions | |
| 3. Includes visualization functions that: | |
| - Show results PER EXPERIMENT | |
| - Compare models across experiments | |
| - Display parameter trends | |
| 4. Shows the best model for biomass, substrate, and product separately | |
| The code must include: | |
| - Data loading with experiment identification | |
| - Model comparison by experiment and variable type | |
| - Visualization showing results per experiment | |
| - Overall best model selection with justification | |
| - Functions to predict using the best models for each category | |
| Make sure to include comments indicating which model won for each variable type and why. | |
| Format: Complete, executable Python code with actual data values embedded. | |
| """ | |
| code_response = self.client.messages.create( | |
| model=claude_model, | |
| max_tokens=3000, | |
| messages=[{ | |
| "role": "user", | |
| "content": code_prompt | |
| }] | |
| ) | |
| return { | |
| "tipo": "Comparative Analysis of Mathematical Models", | |
| "analisis_completo": response.content[0].text, | |
| "codigo_implementacion": code_response.content[0].text, | |
| "resumen_datos": { | |
| "n_modelos": len(data), | |
| "columnas": list(data.columns), | |
| "metricas_disponibles": [col for col in data.columns if any(metric in col.lower() | |
| for metric in ['r2', 'rmse', 'aic', 'bic', 'mse'])], | |
| "mejor_r2": data['R2'].max() if 'R2' in data.columns else None, | |
| "mejor_modelo_r2": data.loc[data['R2'].idxmax()]['Model'] if 'R2' in data.columns and 'Model' in data.columns else None, | |
| "datos_completos": data_dict # Incluir todos los datos para el código | |
| } | |
| } | |
| except Exception as e: | |
| return {"error": str(e)} | |
| def process_files(files, claude_model: str, detail_level: str = "detailed", | |
| language: str = "en", additional_specs: str = "") -> Tuple[str, str]: | |
| """Procesa múltiples archivos con soporte de idioma y especificaciones adicionales""" | |
| processor = FileProcessor() | |
| analyzer = AIAnalyzer(client, model_registry) | |
| results = [] | |
| all_code = [] | |
| for file in files: | |
| if file is None: | |
| continue | |
| file_name = file.name if hasattr(file, 'name') else "archivo" | |
| file_ext = Path(file_name).suffix.lower() | |
| with open(file.name, 'rb') as f: | |
| file_content = f.read() | |
| if file_ext in ['.csv', '.xlsx', '.xls']: | |
| if language == 'es': | |
| results.append(f"## 📊 Análisis de Resultados: {file_name}") | |
| else: | |
| results.append(f"## 📊 Results Analysis: {file_name}") | |
| if file_ext == '.csv': | |
| df = processor.read_csv(file_content) | |
| else: | |
| df = processor.read_excel(file_content) | |
| if df is not None: | |
| analysis_type = analyzer.detect_analysis_type(df) | |
| if analysis_type == AnalysisType.FITTING_RESULTS: | |
| result = analyzer.analyze_fitting_results( | |
| df, claude_model, detail_level, language, additional_specs | |
| ) | |
| if language == 'es': | |
| results.append("### 🎯 ANÁLISIS COMPARATIVO DE MODELOS MATEMÁTICOS") | |
| else: | |
| results.append("### 🎯 COMPARATIVE ANALYSIS OF MATHEMATICAL MODELS") | |
| results.append(result.get("analisis_completo", "")) | |
| if "codigo_implementacion" in result: | |
| all_code.append(result["codigo_implementacion"]) | |
| results.append("\n---\n") | |
| analysis_text = "\n".join(results) | |
| code_text = "\n\n# " + "="*50 + "\n\n".join(all_code) if all_code else generate_implementation_code(analysis_text) | |
| return analysis_text, code_text | |
| def generate_implementation_code(analysis_results: str) -> str: | |
| """Genera código de implementación con análisis por experimento""" | |
| code = """ | |
| import numpy as np | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| from scipy.integrate import odeint | |
| from scipy.optimize import curve_fit, differential_evolution | |
| from sklearn.metrics import r2_score, mean_squared_error | |
| import seaborn as sns | |
| from typing import Dict, List, Tuple, Optional | |
| # Visualization configuration | |
| plt.style.use('seaborn-v0_8-darkgrid') | |
| sns.set_palette("husl") | |
| class ExperimentalModelAnalyzer: | |
| \"\"\" | |
| Class for comparative analysis of biotechnological models across multiple experiments. | |
| Analyzes biomass, substrate and product models separately for each experimental condition. | |
| \"\"\" | |
| def __init__(self): | |
| self.results_df = None | |
| self.experiments = {} | |
| self.best_models_by_experiment = {} | |
| self.overall_best_models = { | |
| 'biomass': None, | |
| 'substrate': None, | |
| 'product': None | |
| } | |
| def load_results(self, file_path: str = None, data_dict: dict = None) -> pd.DataFrame: | |
| \"\"\"Load fitting results from CSV/Excel file or dictionary\"\"\" | |
| if data_dict: | |
| self.results_df = pd.DataFrame(data_dict) | |
| elif file_path: | |
| if file_path.endswith('.csv'): | |
| self.results_df = pd.read_csv(file_path) | |
| else: | |
| self.results_df = pd.read_excel(file_path) | |
| print(f"✅ Data loaded: {len(self.results_df)} models") | |
| print(f"📊 Available columns: {list(self.results_df.columns)}") | |
| # Identify experiments | |
| if 'Experiment' in self.results_df.columns: | |
| self.experiments = self.results_df.groupby('Experiment').groups | |
| print(f"🧪 Experiments found: {list(self.experiments.keys())}") | |
| return self.results_df | |
| def analyze_by_experiment(self, | |
| experiment_col: str = 'Experiment', | |
| model_col: str = 'Model', | |
| type_col: str = 'Type', | |
| r2_col: str = 'R2', | |
| rmse_col: str = 'RMSE') -> Dict: | |
| \"\"\" | |
| Analyze models by experiment and variable type. | |
| Identifies best models for biomass, substrate, and product in each experiment. | |
| \"\"\" | |
| if self.results_df is None: | |
| raise ValueError("First load data with load_results()") | |
| results_by_exp = {} | |
| # Get unique experiments | |
| if experiment_col in self.results_df.columns: | |
| experiments = self.results_df[experiment_col].unique() | |
| else: | |
| experiments = ['All_Data'] | |
| self.results_df[experiment_col] = 'All_Data' | |
| print("\\n" + "="*80) | |
| print("📊 ANALYSIS BY EXPERIMENT AND VARIABLE TYPE") | |
| print("="*80) | |
| for exp in experiments: | |
| print(f"\\n🧪 EXPERIMENT: {exp}") | |
| print("-"*50) | |
| exp_data = self.results_df[self.results_df[experiment_col] == exp] | |
| results_by_exp[exp] = {} | |
| # Analyze by variable type if available | |
| if type_col in exp_data.columns: | |
| var_types = exp_data[type_col].unique() | |
| for var_type in var_types: | |
| var_data = exp_data[exp_data[type_col] == var_type] | |
| if not var_data.empty: | |
| # Find best model for this variable type | |
| best_idx = var_data[r2_col].idxmax() | |
| best_model = var_data.loc[best_idx] | |
| results_by_exp[exp][var_type] = { | |
| 'best_model': best_model[model_col], | |
| 'r2': best_model[r2_col], | |
| 'rmse': best_model[rmse_col], | |
| 'all_models': var_data[[model_col, r2_col, rmse_col]].to_dict('records') | |
| } | |
| print(f"\\n 📈 {var_type.upper()}:") | |
| print(f" Best Model: {best_model[model_col]}") | |
| print(f" R² = {best_model[r2_col]:.4f}") | |
| print(f" RMSE = {best_model[rmse_col]:.4f}") | |
| # Show all models for this variable | |
| print(f"\\n All {var_type} models tested:") | |
| for _, row in var_data.iterrows(): | |
| print(f" - {row[model_col]}: R²={row[r2_col]:.4f}, RMSE={row[rmse_col]:.4f}") | |
| else: | |
| # If no type column, analyze all models together | |
| best_idx = exp_data[r2_col].idxmax() | |
| best_model = exp_data.loc[best_idx] | |
| results_by_exp[exp]['all'] = { | |
| 'best_model': best_model[model_col], | |
| 'r2': best_model[r2_col], | |
| 'rmse': best_model[rmse_col], | |
| 'all_models': exp_data[[model_col, r2_col, rmse_col]].to_dict('records') | |
| } | |
| self.best_models_by_experiment = results_by_exp | |
| # Determine overall best models | |
| self._determine_overall_best_models() | |
| return results_by_exp | |
| def _determine_overall_best_models(self): | |
| \"\"\"Determine the best models across all experiments\"\"\" | |
| print("\\n" + "="*80) | |
| print("🏆 OVERALL BEST MODELS ACROSS ALL EXPERIMENTS") | |
| print("="*80) | |
| # Aggregate performance by model and type | |
| model_performance = {} | |
| for exp, exp_results in self.best_models_by_experiment.items(): | |
| for var_type, var_results in exp_results.items(): | |
| if var_type not in model_performance: | |
| model_performance[var_type] = {} | |
| for model_data in var_results['all_models']: | |
| model_name = model_data['Model'] | |
| if model_name not in model_performance[var_type]: | |
| model_performance[var_type][model_name] = { | |
| 'r2_values': [], | |
| 'rmse_values': [], | |
| 'experiments': [] | |
| } | |
| model_performance[var_type][model_name]['r2_values'].append(model_data['R2']) | |
| model_performance[var_type][model_name]['rmse_values'].append(model_data['RMSE']) | |
| model_performance[var_type][model_name]['experiments'].append(exp) | |
| # Calculate average performance and select best | |
| for var_type, models in model_performance.items(): | |
| best_avg_r2 = -1 | |
| best_model = None | |
| print(f"\\n📊 {var_type.upper()} MODELS:") | |
| for model_name, perf_data in models.items(): | |
| avg_r2 = np.mean(perf_data['r2_values']) | |
| avg_rmse = np.mean(perf_data['rmse_values']) | |
| n_exp = len(perf_data['experiments']) | |
| print(f" {model_name}:") | |
| print(f" Average R² = {avg_r2:.4f}") | |
| print(f" Average RMSE = {avg_rmse:.4f}") | |
| print(f" Tested in {n_exp} experiments") | |
| if avg_r2 > best_avg_r2: | |
| best_avg_r2 = avg_r2 | |
| best_model = { | |
| 'name': model_name, | |
| 'avg_r2': avg_r2, | |
| 'avg_rmse': avg_rmse, | |
| 'n_experiments': n_exp | |
| } | |
| if var_type.lower() in ['biomass', 'substrate', 'product']: | |
| self.overall_best_models[var_type.lower()] = best_model | |
| print(f"\\n 🏆 BEST {var_type.upper()} MODEL: {best_model['name']} (Avg R²={best_model['avg_r2']:.4f})") | |
| def create_comparison_visualizations(self): | |
| \"\"\"Create visualizations comparing models across experiments\"\"\" | |
| if not self.best_models_by_experiment: | |
| raise ValueError("First run analyze_by_experiment()") | |
| # Prepare data for visualization | |
| experiments = [] | |
| biomass_r2 = [] | |
| substrate_r2 = [] | |
| product_r2 = [] | |
| for exp, results in self.best_models_by_experiment.items(): | |
| experiments.append(exp) | |
| biomass_r2.append(results.get('Biomass', {}).get('r2', 0)) | |
| substrate_r2.append(results.get('Substrate', {}).get('r2', 0)) | |
| product_r2.append(results.get('Product', {}).get('r2', 0)) | |
| # Create figure with subplots | |
| fig, axes = plt.subplots(2, 2, figsize=(15, 12)) | |
| fig.suptitle('Model Performance Comparison Across Experiments', fontsize=16) | |
| # 1. R² comparison by experiment and variable type | |
| ax1 = axes[0, 0] | |
| x = np.arange(len(experiments)) | |
| width = 0.25 | |
| ax1.bar(x - width, biomass_r2, width, label='Biomass', color='green', alpha=0.8) | |
| ax1.bar(x, substrate_r2, width, label='Substrate', color='blue', alpha=0.8) | |
| ax1.bar(x + width, product_r2, width, label='Product', color='red', alpha=0.8) | |
| ax1.set_xlabel('Experiment') | |
| ax1.set_ylabel('R²') | |
| ax1.set_title('Best Model R² by Experiment and Variable Type') | |
| ax1.set_xticks(x) | |
| ax1.set_xticklabels(experiments, rotation=45, ha='right') | |
| ax1.legend() | |
| ax1.grid(True, alpha=0.3) | |
| # Add value labels | |
| for i, (b, s, p) in enumerate(zip(biomass_r2, substrate_r2, product_r2)): | |
| if b > 0: ax1.text(i - width, b + 0.01, f'{b:.3f}', ha='center', va='bottom', fontsize=8) | |
| if s > 0: ax1.text(i, s + 0.01, f'{s:.3f}', ha='center', va='bottom', fontsize=8) | |
| if p > 0: ax1.text(i + width, p + 0.01, f'{p:.3f}', ha='center', va='bottom', fontsize=8) | |
| # 2. Model frequency heatmap | |
| ax2 = axes[0, 1] | |
| # This would show which models appear most frequently as best | |
| # Implementation depends on actual data structure | |
| ax2.text(0.5, 0.5, 'Model Frequency Analysis\\n(Most Used Models)', | |
| ha='center', va='center', transform=ax2.transAxes) | |
| ax2.set_title('Most Frequently Selected Models') | |
| # 3. Parameter evolution across experiments | |
| ax3 = axes[1, 0] | |
| ax3.text(0.5, 0.5, 'Parameter Evolution\\nAcross Experiments', | |
| ha='center', va='center', transform=ax3.transAxes) | |
| ax3.set_title('Parameter Trends') | |
| # 4. Overall best models summary | |
| ax4 = axes[1, 1] | |
| ax4.axis('off') | |
| summary_text = "🏆 OVERALL BEST MODELS\\n\\n" | |
| for var_type, model_info in self.overall_best_models.items(): | |
| if model_info: | |
| summary_text += f"{var_type.upper()}:\\n" | |
| summary_text += f" Model: {model_info['name']}\\n" | |
| summary_text += f" Avg R²: {model_info['avg_r2']:.4f}\\n" | |
| summary_text += f" Tested in: {model_info['n_experiments']} experiments\\n\\n" | |
| ax4.text(0.1, 0.9, summary_text, transform=ax4.transAxes, | |
| fontsize=12, verticalalignment='top', fontfamily='monospace') | |
| ax4.set_title('Overall Best Models Summary') | |
| plt.tight_layout() | |
| plt.show() | |
| def generate_summary_table(self) -> pd.DataFrame: | |
| \"\"\"Generate a summary table of best models by experiment and type\"\"\" | |
| summary_data = [] | |
| for exp, results in self.best_models_by_experiment.items(): | |
| for var_type, var_results in results.items(): | |
| summary_data.append({ | |
| 'Experiment': exp, | |
| 'Variable_Type': var_type, | |
| 'Best_Model': var_results['best_model'], | |
| 'R2': var_results['r2'], | |
| 'RMSE': var_results['rmse'] | |
| }) | |
| summary_df = pd.DataFrame(summary_data) | |
| print("\\n📋 SUMMARY TABLE: BEST MODELS BY EXPERIMENT AND VARIABLE TYPE") | |
| print("="*80) | |
| print(summary_df.to_string(index=False)) | |
| return summary_df | |
| # Example usage | |
| if __name__ == "__main__": | |
| print("🧬 Experimental Model Comparison System") | |
| print("="*60) | |
| # Example data structure with experiments | |
| example_data = { | |
| 'Experiment': ['pH_7.0', 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5', 'pH_7.5', | |
| 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5', | |
| 'pH_7.0', 'pH_7.0', 'pH_7.5', 'pH_7.5'], | |
| 'Model': ['Monod', 'Logistic', 'Gompertz', 'Monod', 'Logistic', 'Gompertz', | |
| 'First_Order', 'Monod_Substrate', 'First_Order', 'Monod_Substrate', | |
| 'Luedeking_Piret', 'Linear', 'Luedeking_Piret', 'Linear'], | |
| 'Type': ['Biomass', 'Biomass', 'Biomass', 'Biomass', 'Biomass', 'Biomass', | |
| 'Substrate', 'Substrate', 'Substrate', 'Substrate', | |
| 'Product', 'Product', 'Product', 'Product'], | |
| 'R2': [0.9845, 0.9912, 0.9956, 0.9789, 0.9834, 0.9901, | |
| 0.9723, 0.9856, 0.9698, 0.9812, | |
| 0.9634, 0.9512, 0.9687, 0.9423], | |
| 'RMSE': [0.0234, 0.0189, 0.0145, 0.0267, 0.0223, 0.0178, | |
| 0.0312, 0.0245, 0.0334, 0.0289, | |
| 0.0412, 0.0523, 0.0389, 0.0567], | |
| 'mu_max': [0.45, 0.48, 0.52, 0.42, 0.44, 0.49, | |
| None, None, None, None, None, None, None, None], | |
| 'Ks': [None, None, None, None, None, None, | |
| 2.1, 1.8, 2.3, 1.9, None, None, None, None] | |
| } | |
| # Create analyzer | |
| analyzer = ExperimentalModelAnalyzer() | |
| # Load data | |
| analyzer.load_results(data_dict=example_data) | |
| # Analyze by experiment | |
| results = analyzer.analyze_by_experiment() | |
| # Create visualizations | |
| analyzer.create_comparison_visualizations() | |
| # Generate summary table | |
| summary = analyzer.generate_summary_table() | |
| print("\\n✨ Analysis complete! Best models identified for each experiment and variable type.") | |
| """ | |
| return code | |
| # Estado global para almacenar resultados | |
| class AppState: | |
| def __init__(self): | |
| self.current_analysis = "" | |
| self.current_code = "" | |
| self.current_language = "en" | |
| app_state = AppState() | |
| def export_report(export_format: str, language: str) -> Tuple[str, str]: | |
| """Exporta el reporte al formato seleccionado""" | |
| if not app_state.current_analysis: | |
| error_msg = { | |
| 'en': "No analysis available to export", | |
| 'es': "No hay análisis disponible para exportar", | |
| 'fr': "Aucune analyse disponible pour exporter", | |
| 'de': "Keine Analyse zum Exportieren verfügbar", | |
| 'pt': "Nenhuma análise disponível para exportar" | |
| } | |
| return error_msg.get(language, error_msg['en']), "" | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| try: | |
| if export_format == "DOCX": | |
| filename = f"biotech_analysis_report_{timestamp}.docx" | |
| ReportExporter.export_to_docx(app_state.current_analysis, filename, language) | |
| else: # PDF | |
| filename = f"biotech_analysis_report_{timestamp}.pdf" | |
| ReportExporter.export_to_pdf(app_state.current_analysis, filename, language) | |
| success_msg = TRANSLATIONS[language]['report_exported'] | |
| return f"{success_msg} {filename}", filename | |
| except Exception as e: | |
| return f"Error: {str(e)}", "" | |
| # Interfaz Gradio con soporte multiidioma y temas | |
| def create_interface(): | |
| # Estado inicial | |
| current_theme = "light" | |
| current_language = "en" | |
| def update_interface_language(language): | |
| """Actualiza el idioma de la interfaz""" | |
| app_state.current_language = language | |
| t = TRANSLATIONS[language] | |
| return [ | |
| gr.update(value=f"# {t['title']}"), # title_text | |
| gr.update(value=t['subtitle']), # subtitle_text | |
| gr.update(label=t['upload_files']), # files_input | |
| gr.update(label=t['select_model']), # model_selector | |
| gr.update(label=t['select_language']), # language_selector | |
| gr.update(label=t['select_theme']), # theme_selector | |
| gr.update(label=t['detail_level']), # detail_level | |
| gr.update(label=t['additional_specs'], placeholder=t['additional_specs_placeholder']), # additional_specs | |
| gr.update(value=t['analyze_button']), # analyze_btn | |
| gr.update(label=t['export_format']), # export_format | |
| gr.update(value=t['export_button']), # export_btn | |
| gr.update(label=t['comparative_analysis']), # analysis_output | |
| gr.update(label=t['implementation_code']), # code_output | |
| gr.update(label=t['data_format']) # data_format_accordion | |
| ] | |
| def process_and_store(files, model, detail, language, additional_specs): | |
| """Procesa archivos y almacena resultados""" | |
| if not files: | |
| error_msg = TRANSLATIONS[language]['error_no_files'] | |
| return error_msg, "" | |
| analysis, code = process_files(files, model, detail, language, additional_specs) | |
| app_state.current_analysis = analysis | |
| app_state.current_code = code | |
| return analysis, code | |
| with gr.Blocks(theme=THEMES[current_theme]) as demo: | |
| # Componentes de UI | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| title_text = gr.Markdown(f"# {TRANSLATIONS[current_language]['title']}") | |
| subtitle_text = gr.Markdown(TRANSLATIONS[current_language]['subtitle']) | |
| with gr.Column(scale=1): | |
| with gr.Row(): | |
| language_selector = gr.Dropdown( | |
| choices=[("English", "en"), ("Español", "es"), ("Français", "fr"), | |
| ("Deutsch", "de"), ("Português", "pt")], | |
| value="en", | |
| label=TRANSLATIONS[current_language]['select_language'], | |
| interactive=True | |
| ) | |
| theme_selector = gr.Dropdown( | |
| choices=[("Light", "light"), ("Dark", "dark")], | |
| value="light", | |
| label=TRANSLATIONS[current_language]['select_theme'], | |
| interactive=True | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| files_input = gr.File( | |
| label=TRANSLATIONS[current_language]['upload_files'], | |
| file_count="multiple", | |
| file_types=[".csv", ".xlsx", ".xls", ".pdf", ".zip"], | |
| type="filepath" | |
| ) | |
| model_selector = gr.Dropdown( | |
| choices=list(CLAUDE_MODELS.keys()), | |
| value="claude-3-5-sonnet-20241022", | |
| label=TRANSLATIONS[current_language]['select_model'], | |
| info=f"{TRANSLATIONS[current_language]['best_for']}: {CLAUDE_MODELS['claude-3-5-sonnet-20241022']['best_for']}" | |
| ) | |
| detail_level = gr.Radio( | |
| choices=[ | |
| (TRANSLATIONS[current_language]['detailed'], "detailed"), | |
| (TRANSLATIONS[current_language]['summarized'], "summarized") | |
| ], | |
| value="detailed", | |
| label=TRANSLATIONS[current_language]['detail_level'] | |
| ) | |
| # Nueva entrada para especificaciones adicionales | |
| additional_specs = gr.Textbox( | |
| label=TRANSLATIONS[current_language]['additional_specs'], | |
| placeholder=TRANSLATIONS[current_language]['additional_specs_placeholder'], | |
| lines=3, | |
| max_lines=5, | |
| interactive=True | |
| ) | |
| analyze_btn = gr.Button( | |
| TRANSLATIONS[current_language]['analyze_button'], | |
| variant="primary", | |
| size="lg" | |
| ) | |
| gr.Markdown("---") | |
| export_format = gr.Radio( | |
| choices=["DOCX", "PDF"], | |
| value="PDF", | |
| label=TRANSLATIONS[current_language]['export_format'] | |
| ) | |
| export_btn = gr.Button( | |
| TRANSLATIONS[current_language]['export_button'], | |
| variant="secondary" | |
| ) | |
| export_status = gr.Textbox( | |
| label="Export Status", | |
| interactive=False, | |
| visible=False | |
| ) | |
| export_file = gr.File( | |
| label="Download Report", | |
| visible=False | |
| ) | |
| with gr.Column(scale=2): | |
| analysis_output = gr.Markdown( | |
| label=TRANSLATIONS[current_language]['comparative_analysis'] | |
| ) | |
| code_output = gr.Code( | |
| label=TRANSLATIONS[current_language]['implementation_code'], | |
| language="python", | |
| interactive=True, | |
| lines=20 | |
| ) | |
| data_format_accordion = gr.Accordion( | |
| label=TRANSLATIONS[current_language]['data_format'], | |
| open=False | |
| ) | |
| with data_format_accordion: | |
| gr.Markdown(""" | |
| ### Expected CSV/Excel structure: | |
| | Experiment | Model | Type | R2 | RMSE | AIC | BIC | mu_max | Ks | Parameters | | |
| |------------|-------|------|-----|------|-----|-----|--------|-------|------------| | |
| | pH_7.0 | Monod | Biomass | 0.985 | 0.023 | -45.2 | -42.1 | 0.45 | 2.1 | {...} | | |
| | pH_7.0 | Logistic | Biomass | 0.976 | 0.031 | -42.1 | -39.5 | 0.42 | - | {...} | | |
| | pH_7.0 | First_Order | Substrate | 0.992 | 0.018 | -48.5 | -45.2 | - | 1.8 | {...} | | |
| | pH_7.5 | Monod | Biomass | 0.978 | 0.027 | -44.1 | -41.2 | 0.43 | 2.2 | {...} | | |
| **Important columns:** | |
| - **Experiment**: Experimental condition identifier | |
| - **Model**: Model name | |
| - **Type**: Variable type (Biomass/Substrate/Product) | |
| - **R2, RMSE**: Fit quality metrics | |
| - **Parameters**: Model-specific parameters | |
| """) | |
| # Definir ejemplos | |
| examples = gr.Examples( | |
| examples=[ | |
| [["examples/biomass_models_comparison.csv"], "claude-3-5-sonnet-20241022", "detailed", ""], | |
| [["examples/substrate_kinetics_results.xlsx"], "claude-3-5-sonnet-20241022", "summarized", "Focus on temperature effects"] | |
| ], | |
| inputs=[files_input, model_selector, detail_level, additional_specs], | |
| label=TRANSLATIONS[current_language]['examples'] | |
| ) | |
| # Eventos - Actualizado para incluir additional_specs | |
| language_selector.change( | |
| update_interface_language, | |
| inputs=[language_selector], | |
| outputs=[ | |
| title_text, subtitle_text, files_input, model_selector, | |
| language_selector, theme_selector, detail_level, additional_specs, | |
| analyze_btn, export_format, export_btn, analysis_output, | |
| code_output, data_format_accordion | |
| ] | |
| ) | |
| def change_theme(theme_name): | |
| """Cambia el tema de la interfaz""" | |
| # Nota: En Gradio actual, cambiar el tema dinámicamente requiere recargar | |
| # Esta es una limitación conocida | |
| return gr.Info("Theme will be applied on next page load") | |
| theme_selector.change( | |
| change_theme, | |
| inputs=[theme_selector], | |
| outputs=[] | |
| ) | |
| analyze_btn.click( | |
| fn=process_and_store, | |
| inputs=[files_input, model_selector, detail_level, language_selector, additional_specs], | |
| outputs=[analysis_output, code_output] | |
| ) | |
| def handle_export(format, language): | |
| status, file = export_report(format, language) | |
| if file: | |
| return gr.update(value=status, visible=True), gr.update(value=file, visible=True) | |
| else: | |
| return gr.update(value=status, visible=True), gr.update(visible=False) | |
| export_btn.click( | |
| fn=handle_export, | |
| inputs=[export_format, language_selector], | |
| outputs=[export_status, export_file] | |
| ) | |
| return demo | |
| # Función principal | |
| def main(): | |
| if not os.getenv("ANTHROPIC_API_KEY"): | |
| print("⚠️ Configure ANTHROPIC_API_KEY in HuggingFace Space secrets") | |
| return gr.Interface( | |
| fn=lambda x: TRANSLATIONS['en']['error_no_api'], | |
| inputs=gr.Textbox(), | |
| outputs=gr.Textbox(), | |
| title="Configuration Error" | |
| ) | |
| return create_interface() | |
| # Para ejecución local | |
| if __name__ == "__main__": | |
| demo = main() | |
| if demo: | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False | |
| ) |