Bello-Silva / app.py
jfforero's picture
Update app.py
00f78a3 verified
"""
BELLO - Entornos Virtuales Afectivos (FULLY FIXED)
Working image generation + Enhanced 360° viewer with play/pause, continuous/random, and chunk dropdown
"""
import os
import math
import struct
import tempfile
import json
import base64
import warnings
import subprocess
import sys
# Suppress TensorFlow warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
warnings.filterwarnings('ignore', category=UserWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning)
try:
from pydub import AudioSegment
except ImportError:
print("Installing pydub...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "pydub"])
from pydub import AudioSegment
import numpy as np
import gradio as gr
from PIL import Image, ImageDraw, ImageFont
import cv2
# Check for ffmpeg
try:
subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
except FileNotFoundError:
print("⚠️ FFmpeg not found. Installing...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "ffmpeg-python"])
try:
import torch
except ImportError:
print("Installing torch...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "torch"])
import torch
try:
import scipy.io.wavfile
except ImportError:
print("Installing scipy...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "scipy"])
import scipy.io.wavfile
try:
import requests
except ImportError:
print("Installing requests...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "requests"])
import requests
try:
from textblob import TextBlob
except ImportError:
print("Installing textblob...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "textblob"])
from textblob import TextBlob
try:
import librosa
except ImportError:
print("Installing librosa...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "librosa"])
import librosa
# =========================
# Model Loading with Fallbacks
# =========================
model = None
model2 = None
processor = None
music_model = None
device = None
def load_emotion_model(model_path):
"""Load emotion model with fallback"""
try:
from tensorflow.keras.models import load_model
if os.path.exists(model_path):
model = load_model(model_path)
print(f"✅ Emotion model loaded: {model_path}")
return model
else:
print(f"⚠️ Emotion model not found: {model_path}")
return None
except Exception as e:
print(f"⚠️ Error loading emotion model: {e}")
return None
model_path = "mymodel_SER_LSTM_RAVDESS.h5"
model = load_emotion_model(model_path)
# Try to load Whisper
try:
from faster_whisper import WhisperModel
print("📥 Loading Whisper model...")
model2 = WhisperModel("small", device="cpu", compute_type="int8")
print("✅ Whisper model loaded")
except Exception as e:
print(f"⚠️ Whisper model not available: {e}")
model2 = None
# Try to load MusicGen
def load_musicgen_model():
try:
print("📥 Loading MusicGen model...")
from transformers import AutoProcessor, MusicgenForConditionalGeneration
try:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
music_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
music_model.to(device)
# Set to eval mode to avoid gradient tracking
music_model.eval()
print("✅ MusicGen model loaded")
return processor, music_model, device
except Exception as load_error:
print(f"⚠️ Error during MusicGen loading: {load_error}")
return None, None, None
except ImportError as e:
print(f"⚠️ Transformers not installed: {e}")
return None, None, None
except Exception as e:
print(f"⚠️ MusicGen model not available: {e}")
return None, None, None
processor, music_model, device = load_musicgen_model()
# =========================
# Audio Processing
# =========================
def extract_mfcc(wav_filepath):
"""Extract MFCC features from audio"""
try:
y, sr = librosa.load(wav_filepath, sr=22050)
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
mfcc = np.mean(mfcc.T, axis=0)
return mfcc
except Exception as e:
print(f"⚠️ MFCC extraction failed: {e}")
return None
def transcribe(audio_path):
"""Transcribe audio using Whisper"""
try:
if model2 is None:
return "[Transcripción no disponible]"
segments, info = model2.transcribe(audio_path, language="es")
text = "".join([segment.text for segment in segments])
return text if text else "[Sin habla detectada]"
except Exception as e:
print(f"⚠️ Transcription error: {e}")
return "[Error en transcripción]"
def chunk_audio_with_overlap(audio_path, chunk_duration=10, overlap_percent=20):
"""Split audio into chunks with overlap"""
try:
audio = AudioSegment.from_file(audio_path)
duration_ms = len(audio)
chunk_ms = chunk_duration * 1000
overlap_ms = int(chunk_ms * (overlap_percent / 100.0))
step_ms = chunk_ms - overlap_ms
if chunk_duration <= 0:
raise ValueError("Chunk duration must be positive")
if chunk_duration > duration_ms / 1000:
return [{"path": audio_path, "start_ms": 0, "end_ms": duration_ms, "original_index": 0, "overlap_ms": 0}], 1
chunk_files = []
num_chunks = math.ceil((duration_ms - overlap_ms) / step_ms) if step_ms > 0 else 1
for i in range(num_chunks):
start_ms = i * step_ms
end_ms = min(start_ms + chunk_ms, duration_ms)
if start_ms >= duration_ms:
break
chunk = audio[start_ms:end_ms]
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
chunk.export(tmp_file.name, format="wav")
chunk_files.append({
"path": tmp_file.name,
"start_ms": start_ms,
"end_ms": end_ms,
"original_index": i,
"overlap_ms": overlap_ms if i > 0 else 0
})
return chunk_files, len(chunk_files)
except Exception as e:
print(f"⚠️ Audio chunking error: {e}")
try:
audio_len = len(AudioSegment.from_file(audio_path))
return [{"path": audio_path, "start_ms": 0, "end_ms": audio_len, "original_index": 0, "overlap_ms": 0}], 1
except:
return [], 0
# =========================
# Emotion & Sentiment Analysis
# =========================
emotions = {
0: "neutral", 1: "calm", 2: "happy", 3: "sad",
4: "angry", 5: "fearful", 6: "disgust", 7: "surprised",
}
def predict_emotion_from_audio(wav_filepath):
"""Predict emotion from audio"""
try:
if model is None:
return "neutral"
test_point = extract_mfcc(wav_filepath)
if test_point is not None:
test_point = np.reshape(test_point, newshape=(1, 40, 1))
predictions = model.predict(test_point, verbose=0)
predicted_class = np.argmax(predictions[0])
return emotions.get(predicted_class, "neutral")
return "neutral"
except Exception as e:
print(f"⚠️ Emotion prediction error: {e}")
return "neutral"
def analyze_sentiment(text):
"""Analyze sentiment from text"""
try:
if not text or text.strip() == "" or text.startswith("["):
return "neutral", 0.0
analysis = TextBlob(text)
polarity = analysis.sentiment.polarity
sentiment = "positive" if polarity > 0.1 else "negative" if polarity < -0.1 else "neutral"
return sentiment, polarity
except Exception as e:
print(f"⚠️ Sentiment analysis error: {e}")
return "neutral", 0.0
# =========================
# Image Generation (Fixed - Placeholder based on Sentiment)
# =========================
def generate_image(sentiment_prediction, transcribed_text, chunk_idx, total_chunks):
"""
Generate a 360° panorama placeholder image based on sentiment.
Creates colored equirectangular image with text overlay.
"""
try:
print(f"🎨 Generating image for segment {chunk_idx + 1}, sentiment: {sentiment_prediction}")
# Color mapping based on sentiment
color_map = {
"positive": (100, 200, 100), # Green (RGB)
"negative": (200, 100, 100), # Red (RGB)
"neutral": (100, 150, 200), # Blue (RGB)
}
# Get color with fallback
color = color_map.get(sentiment_prediction.lower(), (100, 150, 200))
print(f" Using color: RGB{color}")
# Create equirectangular image (2:1 aspect ratio for 360° panorama)
print(f" Creating image: 1024x512")
image = Image.new("RGB", (1024, 512), color=color)
if image is None:
raise ValueError("Image creation returned None")
print(f" Image created successfully: {image.size}")
# Add text overlay
try:
from PIL import ImageDraw, ImageFont
draw = ImageDraw.Draw(image)
# Try to use a nice font, fallback to default
try:
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 20)
except:
font = ImageFont.load_default()
# Add sentiment text
text = f"Segment {chunk_idx + 1} - {sentiment_prediction.upper()}"
print(f" Adding text: {text}")
# Calculate center position
bbox = draw.textbbox((0, 0), text, font=font)
text_width = bbox[2] - bbox[0]
text_x = (1024 - text_width) // 2
text_y = 240
# Draw text with white color
draw.text((text_x, text_y), text, fill=(255, 255, 255), font=font)
print(f" Text added at position ({text_x}, {text_y})")
# Add transcription preview (first 60 chars)
if transcribed_text and not transcribed_text.startswith("["):
preview = transcribed_text[:60] + "..." if len(transcribed_text) > 60 else transcribed_text
preview_bbox = draw.textbbox((0, 0), preview, font=font)
preview_width = preview_bbox[2] - preview_bbox[0]
preview_x = (1024 - preview_width) // 2
preview_y = 280
draw.text((preview_x, preview_y), preview, fill=(200, 200, 200), font=font)
print(f" Preview added: {preview[:40]}...")
except Exception as text_error:
print(f"⚠️ Text overlay error (non-critical): {text_error}")
# Continue even if text fails
print(f"✅ Image generated successfully for segment {chunk_idx + 1}")
return image
except Exception as e:
print(f"❌ Image generation error: {e}")
import traceback
traceback.print_exc()
# Return fallback image
fallback = Image.new("RGB", (1024, 512), color=(100, 150, 200))
print(f" Using fallback image")
return fallback
def create_xmp_block(width, height):
"""Create XMP metadata block for 360° panorama"""
xmp = (
f'<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>\n'
f'<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="ExifTool">\n'
f'<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">\n'
f'<rdf:Description rdf:about=""\n'
f'xmlns:GPano="http://ns.google.com/photos/1.0/panorama/"\n'
f'GPano:ProjectionType="equirectangular"\n'
f'GPano:UsePanoramaViewer="True"\n'
f'GPano:FullPanoWidthPixels="{width}"\n'
f'GPano:FullPanoHeightPixels="{height}"\n'
f'GPano:CroppedAreaImageWidthPixels="{width}"\n'
f'GPano:CroppedAreaImageHeightPixels="{height}"\n'
f'GPano:CroppedAreaLeftPixels="0"\n'
f'GPano:CroppedAreaTopPixels="0"/>\n'
f'</rdf:RDF>\n'
f'</x:xmpmeta>\n'
f'<?xpacket end="w"?>'
)
return xmp
def write_xmp_to_jpg(input_path, output_path, width, height):
"""Inject XMP 360° metadata into JPEG"""
try:
with open(input_path, "rb") as f:
data = f.read()
if data[0:2] != b"\xFF\xD8":
raise ValueError("Not a valid JPEG file")
xmp_data = create_xmp_block(width, height)
app1_marker = b"\xFF\xE1"
xmp_header = b"http://ns.adobe.com/xap/1.0/\x00"
xmp_bytes = xmp_data.encode("utf-8")
length = len(xmp_header) + len(xmp_bytes) + 2
length_bytes = struct.pack(">H", length)
output = bytearray()
output.extend(data[0:2]) # SOI
output.extend(app1_marker)
output.extend(length_bytes)
output.extend(xmp_header)
output.extend(xmp_bytes)
output.extend(data[2:])
with open(output_path, "wb") as f:
f.write(output)
except Exception as e:
print(f"⚠️ XMP write error: {e}")
def add_360_metadata(img):
"""Add 360° panorama metadata to image and save"""
try:
print(f" Saving image with metadata...")
if img is None:
raise ValueError("Input image is None")
# Save to temporary file first
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_file:
tmp_path = tmp_file.name
print(f" Temp file: {tmp_path}")
# Ensure image is in RGB mode
if img.mode != 'RGB':
print(f" Converting from {img.mode} to RGB")
img = img.convert('RGB')
# Save as JPEG
img.save(tmp_path, "JPEG", quality=95)
print(f" JPEG saved: {tmp_path}")
# Inject XMP metadata
try:
write_xmp_to_jpg(tmp_path, tmp_path, img.width, img.height)
print(f" XMP metadata injected")
except Exception as xmp_error:
print(f" ⚠️ XMP injection failed (non-critical): {xmp_error}")
# Continue even if XMP fails
print(f"✅ Image saved: {tmp_path}")
return tmp_path
except Exception as e:
print(f"❌ Metadata error: {e}")
import traceback
traceback.print_exc()
# Fallback: save without metadata
try:
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_file:
tmp_path = tmp_file.name
if img.mode != 'RGB':
img = img.convert('RGB')
img.save(tmp_path, "JPEG", quality=95)
print(f"⚠️ Saved without metadata: {tmp_path}")
return tmp_path
except Exception as fallback_error:
print(f"❌ Fallback also failed: {fallback_error}")
return None
# =========================
# Music Generation
# =========================
def generate_music(transcribed_text, emotion_prediction, chunk_idx, total_chunks):
"""Generate music using MusicGen (if available)"""
try:
if processor is None or music_model is None:
print("⚠️ MusicGen not available, skipping music generation")
return None
emotion_prompts = {
"calm": "calm relaxing ambient music",
"happy": "uplifting energetic joyful music",
"sad": "melancholic emotional sad music",
"angry": "intense dramatic aggressive music",
"fearful": "eerie suspenseful music",
"disgust": "dark unsettling music",
"surprised": "playful expressive music",
"neutral": "ambient peaceful background music"
}
prompt = emotion_prompts.get(emotion_prediction.lower(), "ambient background music")
try:
with torch.no_grad():
inputs = processor(text=[prompt], padding=True, return_tensors="pt").to(device)
audio_values = music_model.generate(**inputs, max_new_tokens=256)
sampling_rate = music_model.config.audio_encoder.sampling_rate
audio_data = audio_values[0, 0].cpu().numpy()
audio_data = audio_data / max(1e-9, np.max(np.abs(audio_data)))
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
scipy.io.wavfile.write(tmp_file.name, rate=sampling_rate, data=audio_data)
print(f"✅ Music generated for segment {chunk_idx + 1}")
return tmp_file.name
except RuntimeError as runtime_error:
print(f"⚠️ Music generation runtime error: {runtime_error}")
print(" Skipping music for this segment")
return None
except Exception as e:
print(f"⚠️ Music generation error: {e}")
return None
# =========================
# Chunk Processing
# =========================
def process_chunk(chunk_info, generate_audio=True):
"""Process a single audio chunk"""
try:
chunk_path = chunk_info["path"]
chunk_idx = chunk_info["original_index"]
print(f"\n📋 ====== Processing Chunk {chunk_idx + 1} ======")
print(f"🎯 Step 1: Emotion prediction...")
emotion_prediction = predict_emotion_from_audio(chunk_path)
print(f" ✓ Emotion: {emotion_prediction}")
print(f"🎯 Step 2: Transcription...")
transcribed_text = transcribe(chunk_path)
print(f" ✓ Text: {transcribed_text[:60]}..." if len(transcribed_text) > 60 else f" ✓ Text: {transcribed_text}")
print(f"🎯 Step 3: Sentiment analysis...")
sentiment, polarity = analyze_sentiment(transcribed_text)
print(f" ✓ Sentiment: {sentiment} (polarity: {polarity:.2f})")
print(f"🎯 Step 4: Image generation...")
image = generate_image(sentiment, transcribed_text, chunk_idx, -1)
if image is None:
raise ValueError("Image generation returned None")
print(f" ✓ Image: {image.size} {image.mode}")
print(f"🎯 Step 5: Adding 360° metadata...")
image_360_path = add_360_metadata(image)
print(f" ✓ Path: {image_360_path}")
music_path = None
if generate_audio:
print(f"🎯 Step 6: Music generation...")
music_path = generate_music(transcribed_text, emotion_prediction, chunk_idx, -1)
if music_path:
print(f" ✓ Music: {music_path}")
else:
print(f" ⚠️ Music skipped")
result = {
"chunk_index": chunk_idx + 1,
"emotion": emotion_prediction,
"transcription": transcribed_text[:100] if transcribed_text else "N/A",
"sentiment": sentiment,
"image": image,
"image_360": image_360_path,
"music": music_path,
}
print(f"✅ Chunk {chunk_idx + 1} processed successfully")
return result
except Exception as e:
print(f"\n❌ ERROR processing chunk {chunk_idx + 1}: {e}")
import traceback
traceback.print_exc()
return {
"chunk_index": chunk_idx + 1,
"emotion": "error",
"transcription": str(e),
"sentiment": "error",
"image": Image.new("RGB", (1024, 512), color=(100, 100, 100)),
"image_360": None,
"music": None,
}
def get_predictions(audio_input, generate_audio=True, chunk_duration=10):
"""Process all chunks"""
try:
chunk_infos, total_chunks = chunk_audio_with_overlap(audio_input, chunk_duration, overlap_percent=20)
results = []
for i, chunk_info in enumerate(chunk_infos):
print(f"⏳ Processing chunk {i+1}/{total_chunks}")
result = process_chunk(chunk_info, generate_audio)
results.append(result)
# Cleanup temp files
for chunk_info in chunk_infos:
try:
if chunk_info["path"] != audio_input:
os.unlink(chunk_info["path"])
except:
pass
return results
except Exception as e:
print(f"⚠️ Prediction error: {e}")
return []
# =========================
# 360 Viewer - ENHANCED
# =========================
def create_360_viewer_html(image_paths, audio_paths, output_path):
"""Create enhanced 360 viewer with play/pause, continuous/random, chunk dropdown"""
image_data_list = []
for img_path in image_paths:
try:
with open(img_path, "rb") as f:
img_data = base64.b64encode(f.read()).decode("utf-8")
image_data_list.append(f"data:image/jpeg;base64,{img_data}")
except Exception as e:
print(f"⚠️ Image encoding error: {e}")
image_data_list.append(None)
audio_data_list = []
for audio_path in audio_paths:
if audio_path:
try:
with open(audio_path, "rb") as f:
audio_data = base64.b64encode(f.read()).decode("utf-8")
audio_data_list.append(f"data:audio/wav;base64,{audio_data}")
except Exception as e:
print(f"⚠️ Audio encoding error: {e}")
audio_data_list.append(None)
else:
audio_data_list.append(None)
html_content = f"""
<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>EVA 360 - Visualizador Afectivo</title>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/pannellum@2.5.6/build/pannellum.css"/>
<style>
* {{ margin: 0; padding: 0; box-sizing: border-box; }}
body {{
overflow: hidden;
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
background: #0a0e27;
color: #e0e0e0;
}}
#panorama {{
width: 100vw;
height: calc(100vh - 120px);
}}
#controls {{
position: fixed;
bottom: 0;
left: 0;
right: 0;
height: 120px;
background: linear-gradient(to top, rgba(15, 25, 55, 0.98), rgba(15, 25, 55, 0.90));
border-top: 2px solid rgba(100, 200, 255, 0.3);
padding: 15px 20px;
display: flex;
gap: 20px;
align-items: center;
justify-content: flex-start;
z-index: 1000;
flex-wrap: wrap;
}}
.control-group {{
display: flex;
align-items: center;
gap: 10px;
}}
.control-label {{
color: #80b0ff;
font-weight: 600;
font-size: 12px;
text-transform: uppercase;
margin-right: 5px;
}}
select, input[type="range"] {{
padding: 8px 12px;
background: rgba(20, 40, 70, 0.9);
border: 1px solid rgba(150, 200, 255, 0.3);
color: #e0e0e0;
border-radius: 6px;
font-size: 13px;
cursor: pointer;
}}
select:focus, input[type="range"]:focus {{
outline: none;
border-color: rgba(100, 180, 255, 0.6);
background: rgba(30, 50, 80, 0.95);
}}
button {{
padding: 10px 16px;
background: rgba(80, 150, 200, 0.6);
border: 1px solid rgba(150, 200, 255, 0.3);
color: #e0e0e0;
border-radius: 6px;
cursor: pointer;
transition: all 0.3s ease;
font-weight: 600;
font-size: 13px;
}}
button:hover {{
background: rgba(100, 180, 255, 0.8);
border-color: rgba(150, 200, 255, 0.6);
}}
button.active {{
background: #2ecc71;
border-color: #2ecc71;
color: #fff;
}}
.toggle-group {{
display: flex;
gap: 5px;
}}
.toggle-group button {{
padding: 8px 12px;
font-size: 12px;
}}
#chunk-selector {{
min-width: 150px;
}}
</style>
</head>
<body>
<div id="panorama"></div>
<div id="controls">
<div class="control-group">
<button id="playBtn" class="active">▶ Reproducir</button>
<button id="pauseBtn">⏸ Pausa</button>
</div>
<div class="control-group">
<div class="toggle-group">
<button id="continuousBtn" class="active" title="Reproducir secuencialmente">🔄 Continuo</button>
<button id="randomBtn" title="Reproducir en orden aleatorio">🔀 Aleatorio</button>
</div>
</div>
<div class="control-group">
<label class="control-label">Saltar a Segmento:</label>
<select id="chunk-selector">
{"".join([f'<option value="{i}">Segmento {i+1}</option>' for i in range(len(image_data_list))])}
</select>
</div>
<div class="control-group" style="margin-left: auto;">
<span id="status" style="font-size: 12px; color: #b0b0ff;">Parado</span>
</div>
</div>
<script src="https://cdn.jsdelivr.net/npm/pannellum@2.5.6/build/pannellum.js"></script>
<script>
const images = {json.dumps(image_data_list)};
const audioFiles = {json.dumps(audio_data_list)};
let state = {{
currentIndex: 0,
mode: 'continuous',
isPlaying: false,
viewer: null,
sequence: []
}};
function loadPanorama(index) {{
if (state.viewer) {{
try {{ state.viewer.destroy(); }} catch(e) {{}}
}}
state.currentIndex = index;
document.getElementById('chunk-selector').value = index;
state.viewer = pannellum.viewer('panorama', {{
type: "equirectangular",
panorama: images[index],
autoLoad: true,
autoRotate: -2,
showZoomCtrl: true,
showFullscreenCtrl: true,
hfov: 100
}});
}}
async function playSequence() {{
state.isPlaying = true;
document.getElementById('playBtn').classList.add('active');
document.getElementById('pauseBtn').classList.remove('active');
document.getElementById('status').textContent = 'Reproduciendo...';
state.sequence = Array.from({{length: images.length}}, (_, i) => i);
if (state.mode === 'random') {{
// Fisher-Yates shuffle
for (let i = state.sequence.length - 1; i > 0; i--) {{
const j = Math.floor(Math.random() * (i + 1));
[state.sequence[i], state.sequence[j]] = [state.sequence[j], state.sequence[i]];
}}
}}
for (let idx of state.sequence) {{
if (!state.isPlaying) {{
document.getElementById('status').textContent = 'Pausado';
break;
}}
loadPanorama(idx);
if (audioFiles[idx]) {{
try {{
const audio = new Audio(audioFiles[idx]);
document.getElementById('status').textContent = `Reproduciendo segmento ${{idx + 1}}...`;
await new Promise(resolve => {{
audio.onended = resolve;
audio.play().catch(e => {{
console.log('Play error:', e);
setTimeout(resolve, 2000);
}});
}});
}} catch (e) {{
console.log('Audio error:', e);
await new Promise(r => setTimeout(r, 2000));
}}
}} else {{
document.getElementById('status').textContent = `Segmento ${{idx + 1}} (sin audio)`;
await new Promise(r => setTimeout(r, 2000));
}}
}}
if (state.isPlaying) {{
state.isPlaying = false;
document.getElementById('playBtn').classList.remove('active');
document.getElementById('status').textContent = 'Completado';
}}
}}
// Play button
document.getElementById('playBtn').addEventListener('click', function() {{
if (!state.isPlaying) {{
playSequence();
}}
}});
// Pause button
document.getElementById('pauseBtn').addEventListener('click', function() {{
state.isPlaying = false;
document.getElementById('playBtn').classList.remove('active');
document.getElementById('status').textContent = 'Pausado';
}});
// Mode toggles
document.getElementById('continuousBtn').addEventListener('click', function() {{
state.mode = 'continuous';
this.classList.add('active');
document.getElementById('randomBtn').classList.remove('active');
document.getElementById('status').textContent = 'Modo: Continuo';
}});
document.getElementById('randomBtn').addEventListener('click', function() {{
state.mode = 'random';
this.classList.add('active');
document.getElementById('continuousBtn').classList.remove('active');
document.getElementById('status').textContent = 'Modo: Aleatorio';
}});
// Chunk selector dropdown
document.getElementById('chunk-selector').addEventListener('change', function(e) {{
const selectedIndex = parseInt(e.target.value);
state.isPlaying = false;
loadPanorama(selectedIndex);
document.getElementById('status').textContent = `Segmento ${{selectedIndex + 1}}`;
}});
// Initialize
loadPanorama(0);
document.getElementById('status').textContent = 'Listo para reproducir';
</script>
</body>
</html>
"""
with open(output_path, "w") as f:
f.write(html_content)
return output_path
# =========================
# Gradio Interface
# =========================
def process_audio(audio_input, generate_audio, chunk_duration):
"""Main processing function"""
if not audio_input:
return "❌ Por favor carga un archivo de audio", [], None
# Validate and fix chunk duration
if chunk_duration is None:
chunk_duration = 10
else:
try:
chunk_duration = float(chunk_duration)
if chunk_duration < 1:
chunk_duration = 10
print(f"⚠️ Chunk duration too small, using default: 10s")
elif chunk_duration > 120:
chunk_duration = 120
print(f"⚠️ Chunk duration too large, using maximum: 120s")
except (ValueError, TypeError):
chunk_duration = 10
print(f"⚠️ Invalid chunk duration, using default: 10s")
print(f"\n🚀 Starting processing with {chunk_duration}s chunks...")
results = get_predictions(audio_input, generate_audio, chunk_duration)
if not results:
return "❌ Error al procesar el audio", [], None
# Build output markdown
output_md = f"## ✅ Procesamiento Completado\n\n"
output_md += f"**Total de segmentos:** {len(results)}\n\n"
images_list = []
for i, result in enumerate(results):
output_md += f"### 📊 Segmento {i+1}\n"
output_md += f"- **Emoción:** {result['emotion']}\n"
output_md += f"- **Transcripción:** {result['transcription']}\n"
output_md += f"- **Sentimiento:** {result['sentiment']}\n\n"
if result['image']:
images_list.append((result['image'], f"Seg {i+1}"))
# Create viewer
viewer_path = None
image_paths = [r['image_360'] for r in results if r['image_360']]
audio_paths = [r['music'] for r in results]
if image_paths:
try:
with tempfile.NamedTemporaryFile(suffix=".html", delete=False, mode='w') as tmp:
viewer_path = create_360_viewer_html(image_paths, audio_paths, tmp.name)
print(f"✅ Viewer created: {viewer_path}")
except Exception as e:
print(f"⚠️ Error creating viewer: {e}")
viewer_path = None
return output_md, images_list, viewer_path
# Create interface
with gr.Blocks(title="BELLO Seamless Enhanced") as demo:
gr.Markdown("# 🌀 BELLO Seamless - Entornos Virtuales Afectivos")
gr.Markdown("""
**BELLO Seamless** explora las emociones a través de la voz y crea experiencias inmersivas en 360°.
### ¿Cómo usar?
1. 🎤 Carga un archivo de audio
2. ⚙️ Ajusta la duración de segmentos (5-60 segundos)
3. 🎵 Marca para generar música (opcional, más lento)
4. 🚀 Presiona "Generar EVA"
5. 📥 Descarga y abre el visualizador HTML en tu navegador
6. ▶️ Usa los controles: Play/Pause, Continuo/Aleatorio, Dropdown de segmentos
""")
with gr.Row():
with gr.Column(scale=2):
audio = gr.Audio(label="🎤 Audio", type="filepath", sources=["upload", "microphone"])
with gr.Column(scale=1):
duration = gr.Number(label="⏱ Duración Segmento (s)", value=10, minimum=2, maximum=120, step=1)
gen_music = gr.Checkbox(label="🎵 Generar Música", value=False)
with gr.Row():
process_btn = gr.Button("🚀 Generar EVA", variant="primary")
clear_btn = gr.Button("🗑 Limpiar", variant="secondary")
with gr.Tabs():
with gr.TabItem("📊 Resultados"):
output_text = gr.Markdown(value="*Resultados aparecerán aquí*")
gallery = gr.Gallery(label="🖼 Imágenes", columns=2, rows=2)
with gr.TabItem("🎬 Visualizador 360°"):
gr.Markdown("### 📥 Descarga tu EVA aquí")
gr.Markdown("Una vez que el procesamiento esté completo, haz clic en el botón de descarga para obtener el visualizador HTML interactivo.")
viewer = gr.File(label="📥 Descargar Visualizador 360°", type="filepath")
def clear():
return None, "❌ Borrado", [], None
process_btn.click(
fn=process_audio,
inputs=[audio, gen_music, duration],
outputs=[output_text, gallery, viewer]
)
clear_btn.click(
fn=clear,
outputs=[audio, output_text, gallery, viewer]
)
if __name__ == "__main__":
print("\n" + "="*60)
print("🌀 BELLO Seamless Enhanced - Iniciando...")
print("="*60 + "\n")
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_error=True
)