import gradio as gr
import numpy as np
import librosa
import requests
from io import BytesIO
from PIL import Image
import os
import secrets
import tempfile
import base64
import math
import struct
import cv2
import shutil
from tensorflow.keras.models import load_model
from faster_whisper import WhisperModel
from textblob import TextBlob
import torch
import scipy.io.wavfile
from transformers import AutoProcessor, MusicgenForConditionalGeneration
from pydub import AudioSegment
from fastapi import FastAPI, Request
from fastapi.responses import RedirectResponse, HTMLResponse
from gradio.routes import mount_gradio_app
# Intentar importar PyGithub, pero si no está, mostrar mensaje amigable
try:
from github import Github, GithubException
GITHUB_AVAILABLE = True
except ImportError:
GITHUB_AVAILABLE = False
print("⚠️ PyGithub no instalado. La publicación en GitHub no estará disponible.")
# ============================================================
# 1. Carga de modelos de IA
# ============================================================
def load_emotion_model(model_path):
try:
m = load_model(model_path)
print("Emotion model loaded successfully")
return m
except Exception as e:
print("Error loading emotion model:", e)
return None
model_path = "mymodel_SER_LSTM_RAVDESS.h5"
model = load_emotion_model(model_path)
# Whisper
model_size = "small"
whisper_model = WhisperModel(model_size, device="cpu", compute_type="int8")
# MusicGen
def load_musicgen_model():
try:
device = "cuda" if torch.cuda.is_available() else "cpu"
processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
music_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
music_model.to(device)
print("MusicGen model loaded successfully")
return processor, music_model, device
except Exception as e:
print("Error loading MusicGen:", e)
return None, None, None
processor, music_model, device = load_musicgen_model()
# DeepAI API key (opcional, para imágenes)
DEEPAI_API_KEY = os.getenv("DeepAI_api_key")
# ============================================================
# 2. Utilidades de audio y emociones
# ============================================================
def chunk_audio(audio_path, chunk_duration=10):
try:
audio = AudioSegment.from_file(audio_path)
duration_ms = len(audio)
chunk_ms = chunk_duration * 1000
if chunk_duration <= 0:
raise ValueError("Chunk duration must be positive")
if chunk_duration > duration_ms / 1000:
return [audio_path], 1
chunk_files = []
num_chunks = math.ceil(duration_ms / chunk_ms)
for i in range(num_chunks):
start_ms = i * chunk_ms
end_ms = min((i + 1) * chunk_ms, duration_ms)
chunk = audio[start_ms:end_ms]
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
chunk.export(tmp.name, format="wav")
chunk_files.append(tmp.name)
return chunk_files, num_chunks
except Exception as e:
print("Error chunking audio:", e)
return [audio_path], 1
def transcribe(wav_filepath):
try:
segments, _ = whisper_model.transcribe(wav_filepath, beam_size=5)
return "".join([seg.text for seg in segments])
except Exception as e:
print("Error transcribing:", e)
return "Transcription failed"
def extract_mfcc(wav_file_name):
try:
y, sr = librosa.load(wav_file_name)
mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40).T, axis=0)
return mfccs
except Exception as e:
print("Error extracting MFCC:", e)
return None
emotions = {
0: "neutral", 1: "calm", 2: "happy", 3: "sad",
4: "angry", 5: "fearful", 6: "disgust", 7: "surprised",
}
def predict_emotion_from_audio(wav_filepath):
try:
if model is None:
return "Model not loaded"
feats = extract_mfcc(wav_filepath)
if feats is None:
return "Feature extraction error"
feats = np.reshape(feats, (1, 40, 1))
pred = model.predict(feats, verbose=0)
label = np.argmax(pred[0])
return emotions.get(label, "unknown")
except Exception as e:
print("Emotion prediction error:", e)
return "Prediction error"
def analyze_sentiment(text):
if not text or not text.strip():
return "neutral", 0.0
analysis = TextBlob(text)
polarity = analysis.sentiment.polarity
sentiment = "positive" if polarity > 0.1 else "negative" if polarity < -0.1 else "neutral"
return sentiment, polarity
# ============================================================
# 3. Prompts para imagen y música
# ============================================================
def get_image_prompt(sentiment, text, chunk_idx, total_chunks):
base = f"Generate an equirectangular 360° panoramic graphite sketch drawing, detailed pencil texture with faint neon glows, cinematic lighting of: {text[:200]}."
if sentiment == "positive":
return base + " Use bright, high contrast, rich colors, joyful atmosphere."
elif sentiment == "negative":
return base + " Use dark, low contrast, somber tones, melancholic atmosphere."
else:
return base + " Use balanced, neutral colors, calm atmosphere."
def get_music_prompt(emotion, text, chunk_idx, total_chunks):
prompts = {
"neutral": f"Neutral ambient orchestral music, steady tempo, no strong emotions, inspired by: {text[:100]}",
"calm": f"Calm, peaceful orchestral music, slow strings, soft dynamics, inspired by: {text[:100]}",
"happy": f"Happy, uplifting orchestral music, major key, lively rhythm, inspired by: {text[:100]}",
"sad": f"Sad, melancholic orchestral music, minor key, slow tempo, inspired by: {text[:100]}",
"angry": f"Angry, aggressive orchestral music, dissonant, strong percussion, inspired by: {text[:100]}",
"fearful": f"Fearful, tense orchestral music, unstable harmonies, suspenseful, inspired by: {text[:100]}",
"disgust": f"Disgusted, harsh orchestral music, irregular rhythm, rough textures, inspired by: {text[:100]}",
"surprised": f"Surprised, sudden changes, playful orchestral music, inspired by: {text[:100]}",
}
return prompts.get(emotion.lower(), f"Background music with {emotion} mood for: {text[:100]}")
# ============================================================
# 4. Generación de imagen (DeepAI) y música (MusicGen)
# ============================================================
def upscale_image(image, target_width=4096, target_height=2048):
try:
if not DEEPAI_API_KEY:
img = image.resize((target_width, target_height), Image.Resampling.LANCZOS)
return img
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
image.save(tmp.name, "JPEG", quality=95)
response = requests.post(
"https://api.deepai.org/api/torch-srgan",
files={"image": open(tmp.name, "rb")},
headers={"api-key": DEEPAI_API_KEY},
)
data = response.json()
if "output_url" in data:
img_resp = requests.get(data["output_url"])
up_img = Image.open(BytesIO(img_resp.content))
up_img = up_img.resize((target_width, target_height), Image.Resampling.LANCZOS)
return up_img
# fallback
return image.resize((target_width, target_height), Image.Resampling.LANCZOS)
except Exception as e:
print("Upscale error:", e)
return image.resize((target_width, target_height), Image.Resampling.LANCZOS)
def generate_image(sentiment, text, chunk_idx, total_chunks):
try:
prompt = get_image_prompt(sentiment, text, chunk_idx, total_chunks)
if DEEPAI_API_KEY:
response = requests.post(
"https://api.deepai.org/api/text2img",
data={"text": prompt, "width": 1024, "height": 512, "image_generator_version": "hd"},
headers={"api-key": DEEPAI_API_KEY},
)
data = response.json()
if "output_url" in data:
img_resp = requests.get(data["output_url"])
img = Image.open(BytesIO(img_resp.content))
else:
img = Image.new("RGB", (1024, 512), color="white")
else:
img = Image.new("RGB", (1024, 512), color="white")
up_img = upscale_image(img)
return up_img
except Exception as e:
print("Image generation error:", e)
return Image.new("RGB", (4096, 2048), color="white")
def generate_music(text, emotion, chunk_idx, total_chunks):
try:
if processor is None or music_model is None:
return None
prompt = get_music_prompt(emotion, text, chunk_idx, total_chunks)
if len(prompt) > 200:
prompt = prompt[:200] + "..."
inputs = processor(text=[prompt], padding=True, return_tensors="pt").to(device)
audio_values = music_model.generate(**inputs, max_new_tokens=512)
sampling_rate = music_model.config.audio_encoder.sampling_rate
audio_data = audio_values[0, 0].cpu().numpy()
audio_data = audio_data / max(1e-9, np.max(np.abs(audio_data)))
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
scipy.io.wavfile.write(tmp.name, rate=sampling_rate, data=audio_data)
return tmp.name
except Exception as e:
print("Music generation error:", e)
return None
# ============================================================
# 5. Metadatos 360° (XMP)
# ============================================================
def create_xmp_block(width, height):
return f'''
Esta ventana se cerrará automáticamente. Puedes volver a la aplicación.
''') @fastapi_app.get("/github_callback") async def github_callback(request: Request): code = request.query_params.get("code") state = request.query_params.get("state") if not code or not state: return RedirectResponse(url="/") if not GITHUB_AVAILABLE: return HTMLResponse("PyGithub no instalado.", status_code=500) client_id = os.getenv("GITHUB_CLIENT_ID") client_secret = os.getenv("GITHUB_CLIENT_SECRET") if not client_id or not client_secret: return HTMLResponse("Error: GitHub OAuth no configurado correctamente.", status_code=500) token_url = "https://github.com/login/oauth/access_token" payload = { "client_id": client_id, "client_secret": client_secret, "code": code, "state": state, } headers = {"Accept": "application/json"} resp = requests.post(token_url, data=payload, headers=headers) if resp.status_code != 200: return HTMLResponse(f"Error al obtener token: {resp.text}", status_code=500) token_data = resp.json() access_token = token_data.get("access_token") if not access_token: return HTMLResponse("No se recibió access_token", status_code=500) html_path = _pending_gists.pop(state, None) if not html_path or not os.path.exists(html_path): return HTMLResponse("El archivo HTML ya no está disponible. Vuelve a generar el entorno.", status_code=404) try: g = Github(access_token) user = g.get_user() with open(html_path, "r", encoding="utf-8") as f: content = f.read() gist = user.create_gist( public=True, description="Entorno Virtual Afectivo - EVA 360", files={f"eva_360_{secrets.token_hex(4)}.html": {"content": content}} ) gist_url = gist.html_url return HTMLResponse(f'''Tu EVA 360 está disponible en: {gist_url}
Puedes cerrar esta ventana y volver a la aplicación.
''') except Exception as e: return HTMLResponse(f"Error al crear Gist: {str(e)}", status_code=500) # ============================================================ # 10. Interfaz Gradio # ============================================================ output_containers = [] group_components = [] def process_and_display(audio_input, generate_audio, chunk_duration): if chunk_duration is None or chunk_duration <= 0: chunk_duration = 10 yield ( [gr.HTML(f'''Procesando audio en segmentos de {chunk_duration} segundos...