lyric-sync-api

Sleeping

App Files Files Community

Joyboy-dy commited on Feb 2

Commit

9d59b4c

verified ·

1 Parent(s): c088e56

Upload 3 files

Browse files

Files changed (3) hide show

Dockerfile +25 -0
requirements.txt +4 -0
server.py +135 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,25 @@

+FROM python:3.11
+WORKDIR /app
+# Install system dependencies (ffmpeg is required for audio processing)
+RUN apt-get update && apt-get install -y ffmpeg
+# Copy requirements and install
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . .
+# Create a user to avoid running as root (security best practice required by HF)
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# Expose port 7860 (Hugging Face default port)
+EXPOSE 7860
+# Start command
+CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "7860"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+fastapi
+uvicorn[standard]
+python-multipart
+faster-whisper

server.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import os
+import tempfile
+import shutil
+from contextlib import asynccontextmanager
+from fastapi import FastAPI, UploadFile, File, Form, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from faster_whisper import WhisperModel
+# Model configuration - tiny is fastest, small is more accurate
+MODEL_SIZE = "tiny"
+model = None
+def get_model():
+    global model
+    if model is None:
+        print(f"Loading Whisper model: {MODEL_SIZE}...")
+        model = WhisperModel(MODEL_SIZE, device="cpu", compute_type="int8")
+        print("Model loaded!")
+    return model
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Preload model at startup
+    print("Server starting up - preloading model...")
+    get_model()
+    print("Model ready!")
+    yield
+    print("Server shutting down...")
+app = FastAPI(title="LyricSync Backend", lifespan=lifespan)
+# Configure CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.get("/")
+@app.head("/")
+async def root():
+    return {"service": "LyricSync Backend", "engine": "faster-whisper", "status": "operational"}
+@app.get("/health")
+@app.head("/health")
+async def health():
+    return {"status": "healthy"}
+def format_srt_time(seconds: float) -> str:
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    secs = int(seconds % 60)
+    millis = int((seconds % 1) * 1000)
+    return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}"
+def generate_srt(segments) -> str:
+    srt_lines = []
+    for i, seg in enumerate(segments, 1):
+        start = format_srt_time(seg["start"])
+        end = format_srt_time(seg["end"])
+        text = seg["text"].strip()
+        srt_lines.append(f"{i}\n{start} --> {end}\n{text}\n")
+    return "\n".join(srt_lines)
+@app.post("/align")
+async def align_audio(
+    audio_file: UploadFile = File(...),
+    lyrics: str = Form("")
+):
+    temp_dir = tempfile.mkdtemp()
+    try:
+        # Save audio
+        audio_path = os.path.join(temp_dir, audio_file.filename or "audio.mp3")
+        with open(audio_path, "wb") as f:
+            shutil.copyfileobj(audio_file.file, f)
+        print(f"Processing: {audio_file.filename}")
+        # Transcribe with word timestamps
+        whisper = get_model()
+        segments_gen, info = whisper.transcribe(
+            audio_path,
+            word_timestamps=True,
+            language="en"
+        )
+        # Process segments
+        segments = []
+        word_segments = []
+        for segment in segments_gen:
+            seg_data = {
+                "start": segment.start,
+                "end": segment.end,
+                "text": segment.text,
+                "words": []
+            }
+            if segment.words:
+                for word in segment.words:
+                    word_data = {
+                        "word": word.word,
+                        "start": word.start,
+                        "end": word.end,
+                        "score": getattr(word, 'probability', 0.9)
+                    }
+                    seg_data["words"].append(word_data)
+                    word_segments.append(word_data)
+            segments.append(seg_data)
+        print(f"Processed {len(segments)} segments")
+        return {
+            "srt_content": generate_srt(segments),
+            "word_segments": word_segments,
+            "full_json": {"segments": segments}
+        }
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+    finally:
+        shutil.rmtree(temp_dir, ignore_errors=True)
+if __name__ == "__main__":
+    import uvicorn
+    port = int(os.environ.get("PORT", 10000))
+    print(f"Starting LyricSync backend on port {port}...")
+    uvicorn.run(app, host="0.0.0.0", port=port)