VeuReu commited on
Commit
3d74263
·
verified ·
1 Parent(s): 690b6dc

Upload 8 files

Browse files
Files changed (8) hide show
  1. Dockerfile +21 -0
  2. README.md +10 -0
  3. api.py +167 -0
  4. config.example.yaml +9 -0
  5. models_job.py +31 -0
  6. queue_manager.py +65 -0
  7. requirements.txt +6 -0
  8. worker.py +133 -0
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # Paquetes básicos (ffmpeg si planeas procesar audio)
4
+ RUN apt-get update && apt-get install -y --no-install-recommends \
5
+ build-essential ffmpeg curl git && rm -rf /var/lib/apt/lists/*
6
+
7
+ WORKDIR /app
8
+
9
+ COPY requirements.txt .
10
+ RUN pip install -U pip && pip install -r requirements.txt
11
+
12
+ COPY . .
13
+
14
+ # Crea directorios de trabajo y datos
15
+ RUN mkdir -p /app/data/uploads /app/data/results
16
+
17
+ # Puerto dinámico de HF Spaces
18
+ ENV PORT=7860
19
+
20
+ # Arranque
21
+ CMD ["bash", "-lc", "uvicorn api:app --host 0.0.0.0 --port ${PORT:-7860}"]
README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Veureu Engine
3
+ emoji: 📉
4
+ colorFrom: red
5
+ colorTo: blue
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
api.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # api.py — versión corregida (orden de definición)
2
+
3
+ import os
4
+ import uuid
5
+ from fastapi import FastAPI, UploadFile, File, Form, Depends, Header, HTTPException, APIRouter
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+ from fastapi.responses import JSONResponse
8
+ from typing import Optional
9
+ from models_job import JobCreate, JobStatus, JobResult
10
+ from queue_manager import job_store, job_queue, start_worker, UPLOAD_DIR
11
+ from worker import process_job
12
+ from pydantic import BaseModel
13
+ import subprocess
14
+ import tempfile
15
+ import base64
16
+ import requests
17
+
18
+ API_SHARED_TOKEN = os.environ.get("API_SHARED_TOKEN")
19
+ UI_SPACE_URL = os.environ.get("UI_SPACE_URL") # ej: https://org-tu--ui--space.hf.space
20
+
21
+ # ---------- Matxa - Alvocat (router) ----------
22
+ router = APIRouter()
23
+
24
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
25
+ MATXA_TTS_URL = os.getenv("MATXA_TTS_URL", "").strip()
26
+ INFERENCE_URL = "https://api-inference.huggingface.co/models/projecte-aina/matxa-alvocat"
27
+
28
+ class TTSRequest(BaseModel):
29
+ text: str
30
+
31
+ @router.post("/tts/matxa")
32
+ def tts_matxa(req: TTSRequest):
33
+ text = (req.text or "").strip()
34
+ if not text:
35
+ raise HTTPException(status_code=400, detail="Empty text")
36
+
37
+ try:
38
+ if MATXA_TTS_URL:
39
+ headers = {}
40
+ if HF_TOKEN:
41
+ headers["Authorization"] = f"Bearer {HF_TOKEN}"
42
+ resp = requests.post(
43
+ MATXA_TTS_URL,
44
+ headers=headers,
45
+ json={"text": text},
46
+ timeout=60,
47
+ )
48
+ if resp.status_code != 200:
49
+ raise HTTPException(status_code=502, detail=f"Space TTS error: {resp.text}")
50
+
51
+ if resp.headers.get("content-type", "").startswith("audio/"):
52
+ audio_bytes = resp.content
53
+ b64 = base64.b64encode(audio_bytes).decode("utf-8")
54
+ return {"mp3_data_url": f"data:audio/mpeg;base64,{b64}"}
55
+ else:
56
+ data = resp.json()
57
+ if "audio" in data and isinstance(data["audio"], str) and data["audio"].startswith("data:audio"):
58
+ return {"mp3_data_url": data["audio"]}
59
+ elif "audio_b64" in data:
60
+ audio_bytes = base64.b64decode(data["audio_b64"])
61
+ b64 = base64.b64encode(audio_bytes).decode("utf-8")
62
+ return {"mp3_data_url": f"data:audio/mpeg;base64,{b64}"}
63
+ else:
64
+ audio_bytes = data.get("bytes")
65
+ if isinstance(audio_bytes, str):
66
+ audio_bytes = base64.b64decode(audio_bytes)
67
+ b64 = base64.b64encode(audio_bytes).decode("utf-8")
68
+ return {"mp3_data_url": f"data:audio/mpeg;base64,{b64}"}
69
+
70
+ else:
71
+ if not HF_TOKEN:
72
+ raise HTTPException(status_code=500, detail="HF_TOKEN not set")
73
+ headers = {
74
+ "Authorization": f"Bearer {HF_TOKEN}",
75
+ "Accept": "audio/mpeg",
76
+ }
77
+ resp = requests.post(
78
+ INFERENCE_URL,
79
+ headers=headers,
80
+ json={"inputs": text},
81
+ timeout=60,
82
+ )
83
+ if resp.status_code != 200:
84
+ raise HTTPException(status_code=502, detail=f"Inference API error: {resp.text}")
85
+
86
+ audio_bytes = resp.content
87
+ b64 = base64.b64encode(audio_bytes).decode("utf-8")
88
+ return {"mp3_data_url": f"data:audio/mpeg;base64,{b64}"}
89
+
90
+ except HTTPException:
91
+ raise
92
+ except Exception as e:
93
+ raise HTTPException(status_code=500, detail=str(e))
94
+
95
+ # ---------- FastAPI app principal ----------
96
+ app = FastAPI(title="Veureu AD – API Space")
97
+
98
+ # CORS (restringe a tu UI Space si pasas UI_SPACE_URL)
99
+ app.add_middleware(
100
+ CORSMiddleware,
101
+ allow_origins=[UI_SPACE_URL] if UI_SPACE_URL else ["*"],
102
+ allow_credentials=False,
103
+ allow_methods=["*"],
104
+ allow_headers=["*"],
105
+ )
106
+
107
+ # Lanza el worker al arrancar
108
+ start_worker(process_job)
109
+
110
+ # -------- Auth sencilla por token compartido --------
111
+ def check_auth(authorization: Optional[str] = Header(None)):
112
+ if not API_SHARED_TOKEN:
113
+ return True
114
+ if not authorization or not authorization.startswith("Bearer "):
115
+ raise HTTPException(401, "Missing token")
116
+ if authorization.split(" ", 1)[1] != API_SHARED_TOKEN:
117
+ raise HTTPException(403, "Invalid token")
118
+ return True
119
+
120
+ # -------- Rutas "jobs" --------
121
+ @app.get("/")
122
+ def read_root():
123
+ return {"message": "Hello World"}
124
+
125
+ @app.post("/jobs")
126
+ async def create_job(
127
+ mode: str = Form(default="both"),
128
+ video_file: Optional[UploadFile] = File(default=None),
129
+ video_url: Optional[str] = Form(default=None),
130
+ _auth=Depends(check_auth),
131
+ ):
132
+ if not video_file and not video_url:
133
+ raise HTTPException(400, "Debe enviarse un 'video_file' o un 'video_url'.")
134
+ job_id = str(uuid.uuid4())
135
+ local_path = None
136
+ if video_file:
137
+ os.makedirs(UPLOAD_DIR, exist_ok=True)
138
+ save_path = os.path.join(UPLOAD_DIR, f"{job_id}_{video_file.filename}")
139
+ with open(save_path, "wb") as f:
140
+ f.write(await video_file.read())
141
+ local_path = save_path
142
+ st = JobStatus(job_id=job_id, status="queued", progress=0, message="En cola")
143
+ job_store.set_status(job_id, st)
144
+ job_queue.put({"job_id": job_id, "mode": mode, "local_path": local_path, "video_url": video_url})
145
+ return {"job_id": job_id}
146
+
147
+ @app.get("/jobs/{job_id}/status", response_model=JobStatus)
148
+ def get_status(job_id: str, _auth=Depends(check_auth)):
149
+ st = job_store.get_status(job_id)
150
+ if not st:
151
+ raise HTTPException(404, "Job no encontrado")
152
+ return st
153
+
154
+ @app.get("/jobs/{job_id}/result", response_model=JobResult)
155
+ def get_result(job_id: str, _auth=Depends(check_auth)):
156
+ res = job_store.get_result(job_id)
157
+ if not res:
158
+ st = job_store.get_status(job_id)
159
+ if st and st.status != "completed":
160
+ raise HTTPException(409, "El job no ha terminado")
161
+ raise HTTPException(404, "Resultado no encontrado")
162
+ return res
163
+
164
+ # <<< AHORA SÍ >>>
165
+ app.include_router(router)
166
+
167
+
config.example.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ api:
2
+ cors_origin: "https://org-tu--ui--space.hf.space"
3
+ worker:
4
+ tgi_base_url: ""
5
+ inference_endpoint_url: ""
6
+ inference_model_id: ""
7
+ storage:
8
+ uploads_dir: "/app/data/uploads"
9
+ results_dir: "/app/data/results"
models_job.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # models_job.py
2
+ from pydantic import BaseModel, Field, HttpUrl
3
+ from typing import Optional, List, Dict, Any
4
+
5
+ class JobCreate(BaseModel):
6
+ mode: str = Field(default="both", description="book|une|both")
7
+ video_url: Optional[str] = Field(default=None, description="URL/Ruta del vídeo si no se sube archivo")
8
+
9
+ class CharacterItem(BaseModel):
10
+ name: str
11
+ screen_time_sec: float
12
+
13
+ class Metrics(BaseModel):
14
+ wer: Optional[float] = None
15
+ der: Optional[float] = None
16
+ ux: Optional[float] = None
17
+
18
+ class JobStatus(BaseModel):
19
+ job_id: str
20
+ status: str # queued|processing|completed|failed
21
+ progress: int = 0
22
+ message: Optional[str] = None
23
+
24
+ class JobResult(BaseModel):
25
+ job_id: str
26
+ source_filename: str
27
+ duration_sec: Optional[float] = None
28
+ characters: List[CharacterItem] = []
29
+ book: Optional[Dict[str, Any]] = None # {text, mp3_url}
30
+ une: Optional[Dict[str, Any]] = None # {srt, mp3_url}
31
+ metrics: Optional[Metrics] = None
queue_manager.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # queue_manager.py
2
+ import os
3
+ import threading
4
+ import queue
5
+ import time
6
+ from typing import Dict, Any
7
+ from models_job import JobStatus, JobResult
8
+
9
+ UPLOAD_DIR = os.environ.get("UPLOAD_DIR", "/app/data/uploads")
10
+ RESULTS_DIR = os.environ.get("RESULTS_DIR", "/app/data/results")
11
+
12
+ class JobStore:
13
+ """
14
+ Almacena estados y resultados en memoria.
15
+ Para producción: sustituir por Redis / DB persistente si lo necesitas.
16
+ """
17
+ def __init__(self):
18
+ self.status: Dict[str, JobStatus] = {}
19
+ self.result: Dict[str, JobResult] = {}
20
+ self.lock = threading.Lock()
21
+
22
+ def set_status(self, job_id: str, status: JobStatus):
23
+ with self.lock:
24
+ self.status[job_id] = status
25
+
26
+ def get_status(self, job_id: str) -> JobStatus | None:
27
+ with self.lock:
28
+ return self.status.get(job_id)
29
+
30
+ def set_result(self, job_id: str, result: JobResult):
31
+ with self.lock:
32
+ self.result[job_id] = result
33
+
34
+ def get_result(self, job_id: str) -> JobResult | None:
35
+ with self.lock:
36
+ return self.result.get(job_id)
37
+
38
+ job_store = JobStore()
39
+ job_queue: "queue.Queue[Dict[str, Any]]" = queue.Queue()
40
+
41
+ def worker_loop(process_fn):
42
+ while True:
43
+ job = job_queue.get()
44
+ if job is None:
45
+ break
46
+ try:
47
+ process_fn(job)
48
+ except Exception as e:
49
+ # Marca como failed
50
+ st = job_store.get_status(job["job_id"])
51
+ if st:
52
+ st.status = "failed"
53
+ st.message = f"Error: {e}"
54
+ st.progress = 0
55
+ job_store.set_status(job["job_id"], st)
56
+ finally:
57
+ job_queue.task_done()
58
+
59
+ _worker_thread = None
60
+
61
+ def start_worker(process_fn):
62
+ global _worker_thread
63
+ if _worker_thread is None or not _worker_thread.is_alive():
64
+ _worker_thread = threading.Thread(target=worker_loop, args=(process_fn,), daemon=True)
65
+ _worker_thread.start()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi==0.115.0
2
+ uvicorn[standard]==0.30.6
3
+ pydantic==2.9.2
4
+ python-multipart==0.0.9
5
+ requests==2.32.3
6
+ huggingface_hub==0.25.2
worker.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # worker.py
2
+ import os
3
+ import time
4
+ import uuid
5
+ import requests
6
+ from typing import Dict, Any, Optional
7
+ from queue_manager import job_store, UPLOAD_DIR, RESULTS_DIR
8
+ from models_job import JobStatus, JobResult, CharacterItem, Metrics
9
+
10
+ HF_TOKEN = os.environ.get("HF_TOKEN") # opcional
11
+ TGI_BASE_URL = os.environ.get("TGI_BASE_URL") # ej: https://org-tgi--space.hf.space
12
+ INFERENCE_ENDPOINT_URL = os.environ.get("INFERENCE_ENDPOINT_URL")
13
+ INFERENCE_MODEL_ID = os.environ.get("INFERENCE_MODEL_ID") # p.ej. "meta-llama/Llama-3.1-8B-Instruct"
14
+
15
+ def _auth_headers_json() -> Dict[str, str]:
16
+ headers = {"Content-Type": "application/json"}
17
+ if HF_TOKEN:
18
+ headers["Authorization"] = f"Bearer {HF_TOKEN}"
19
+ return headers
20
+
21
+ def _call_tgi(prompt: str) -> str:
22
+ """
23
+ Ejemplo para TGI /v1/chat/completions (ajusta al formato de tu TGI).
24
+ """
25
+ if not TGI_BASE_URL:
26
+ # si no hay TGI configurado, devuelve texto de demo
27
+ return f"[DEMO] Respuesta generada para: {prompt[:60]}..."
28
+ url = f"{TGI_BASE_URL.rstrip('/')}/v1/chat/completions"
29
+ payload = {
30
+ "model": "tgi", # no siempre necesario
31
+ "messages": [{"role": "user", "content": prompt}],
32
+ "max_tokens": 256
33
+ }
34
+ r = requests.post(url, headers=_auth_headers_json(), json=payload, timeout=120)
35
+ r.raise_for_status()
36
+ data = r.json()
37
+ # Ajusta según la respuesta de tu TGI
38
+ return data["choices"][0]["message"]["content"]
39
+
40
+ def _call_inference_api(prompt: str) -> str:
41
+ """
42
+ Ejemplo para Inference API serverless.
43
+ """
44
+ if not INFERENCE_MODEL_ID:
45
+ return f"[DEMO] Inference API no configurado; prompt: {prompt[:60]}..."
46
+ url = f"https://api-inference.huggingface.co/models/{INFERENCE_MODEL_ID}"
47
+ r = requests.post(url, headers=_auth_headers_json(), json={"inputs": prompt, "parameters": {"max_new_tokens": 128}}, timeout=120)
48
+ r.raise_for_status()
49
+ out = r.json()
50
+ if isinstance(out, list) and out and "generated_text" in out[0]:
51
+ return out[0]["generated_text"]
52
+ return str(out)
53
+
54
+ def _call_inference_endpoint(payload: Dict[str, Any]) -> Dict[str, Any]:
55
+ """
56
+ Ejemplo para Inference Endpoint dedicado.
57
+ """
58
+ if not INFERENCE_ENDPOINT_URL:
59
+ return {"text": "[DEMO] Endpoint no configurado"}
60
+ r = requests.post(INFERENCE_ENDPOINT_URL, headers=_auth_headers_json(), json=payload, timeout=120)
61
+ r.raise_for_status()
62
+ return r.json()
63
+
64
+ def _fake_extract_characters() -> list[CharacterItem]:
65
+ return [
66
+ CharacterItem(name="Alice", screen_time_sec=312.5),
67
+ CharacterItem(name="Bob", screen_time_sec=288.0),
68
+ ]
69
+
70
+ def process_job(job: Dict[str, Any]):
71
+ """
72
+ job = {
73
+ "job_id": str,
74
+ "mode": "book"|"une"|"both",
75
+ "local_path": "/app/data/uploads/xxx.mp4" (si es subida),
76
+ "video_url": "https://..." (si es por URL)
77
+ }
78
+ """
79
+ job_id = job["job_id"]
80
+ mode = job.get("mode", "both")
81
+ src_filename = os.path.basename(job.get("local_path") or job.get("video_url") or f"{job_id}.mp4")
82
+
83
+ # Marca a processing
84
+ st = JobStatus(job_id=job_id, status="processing", progress=5, message="Iniciando procesamiento…")
85
+ job_store.set_status(job_id, st)
86
+
87
+ # (1) Descarga si viene por URL (demo omite; implementa si lo necesitas)
88
+ local_path = job.get("local_path")
89
+ if not local_path and job.get("video_url"):
90
+ # Aquí descargarías el vídeo a local_path
91
+ # local_path = os.path.join(UPLOAD_DIR, f"{job_id}_{src_filename}")
92
+ # requests.get(... stream ...) -> write file
93
+ pass
94
+
95
+ # (2) ASR / Diarización / Preparaciones etc. (simulación)
96
+ time.sleep(1)
97
+ st.progress = 20; st.message = "Extrayendo transcripción/diálogos…"; job_store.set_status(job_id, st)
98
+ # Aquí llamarías a tus pipelines reales (Whisper, diarización, etc.)
99
+
100
+ # (3) Generación “libro” con LLM (demo)
101
+ book_text = None; book_mp3_url = None
102
+ if mode in ("book","both"):
103
+ prompt = "Genera una audiodescripción tipo libro con diálogos condensados del vídeo."
104
+ book_text = _call_tgi(prompt) if TGI_BASE_URL else _call_inference_api(prompt)
105
+ # Si sintetizas audio, guarda mp3 y pon su URL accesible (por simplicidad omitimos)
106
+ book_mp3_url = None
107
+ st.progress = 60; st.message = "Generando texto Libro…"; job_store.set_status(job_id, st)
108
+
109
+ # (4) Generación UNE (SRT + audio) (demo)
110
+ une_srt = None; une_mp3_url = None
111
+ if mode in ("une","both"):
112
+ # Genera un SRT mínimo de ejemplo
113
+ une_srt = "1\n00:00:00,000 --> 00:00:03,000\n[Audiodescripción UNE de ejemplo]\n"
114
+ une_mp3_url = None
115
+ st.progress = 80; st.message = "Generando SRT UNE…"; job_store.set_status(job_id, st)
116
+
117
+ # (5) Personajes, métricas (demo)
118
+ chars = _fake_extract_characters()
119
+ metrics = Metrics(wer=0.07, der=0.12, ux=4.3)
120
+
121
+ time.sleep(1)
122
+ st.progress = 100; st.message = "Completado"; st.status = "completed"; job_store.set_status(job_id, st)
123
+
124
+ result = JobResult(
125
+ job_id=job_id,
126
+ source_filename=src_filename,
127
+ duration_sec=None,
128
+ characters=chars,
129
+ book={"text": book_text, "mp3_url": book_mp3_url} if book_text or book_mp3_url else None,
130
+ une={"srt": une_srt, "mp3_url": une_mp3_url} if une_srt or une_mp3_url else None,
131
+ metrics=metrics
132
+ )
133
+ job_store.set_result(job_id, result)