Joyboy-dy commited on
Commit
9d59b4c
·
verified ·
1 Parent(s): c088e56

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +25 -0
  2. requirements.txt +4 -0
  3. server.py +135 -0
Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies (ffmpeg is required for audio processing)
6
+ RUN apt-get update && apt-get install -y ffmpeg
7
+
8
+ # Copy requirements and install
9
+ COPY requirements.txt .
10
+ RUN pip install --no-cache-dir -r requirements.txt
11
+
12
+ # Copy application code
13
+ COPY . .
14
+
15
+ # Create a user to avoid running as root (security best practice required by HF)
16
+ RUN useradd -m -u 1000 user
17
+ USER user
18
+ ENV HOME=/home/user \
19
+ PATH=/home/user/.local/bin:$PATH
20
+
21
+ # Expose port 7860 (Hugging Face default port)
22
+ EXPOSE 7860
23
+
24
+ # Start command
25
+ CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "7860"]
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ python-multipart
4
+ faster-whisper
server.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import shutil
4
+ from contextlib import asynccontextmanager
5
+ from fastapi import FastAPI, UploadFile, File, Form, HTTPException
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+ from faster_whisper import WhisperModel
8
+
9
+ # Model configuration - tiny is fastest, small is more accurate
10
+ MODEL_SIZE = "tiny"
11
+ model = None
12
+
13
+ def get_model():
14
+ global model
15
+ if model is None:
16
+ print(f"Loading Whisper model: {MODEL_SIZE}...")
17
+ model = WhisperModel(MODEL_SIZE, device="cpu", compute_type="int8")
18
+ print("Model loaded!")
19
+ return model
20
+
21
+ @asynccontextmanager
22
+ async def lifespan(app: FastAPI):
23
+ # Preload model at startup
24
+ print("Server starting up - preloading model...")
25
+ get_model()
26
+ print("Model ready!")
27
+ yield
28
+ print("Server shutting down...")
29
+
30
+ app = FastAPI(title="LyricSync Backend", lifespan=lifespan)
31
+
32
+ # Configure CORS
33
+ app.add_middleware(
34
+ CORSMiddleware,
35
+ allow_origins=["*"],
36
+ allow_credentials=True,
37
+ allow_methods=["*"],
38
+ allow_headers=["*"],
39
+ )
40
+
41
+ @app.get("/")
42
+ @app.head("/")
43
+ async def root():
44
+ return {"service": "LyricSync Backend", "engine": "faster-whisper", "status": "operational"}
45
+
46
+ @app.get("/health")
47
+ @app.head("/health")
48
+ async def health():
49
+ return {"status": "healthy"}
50
+
51
+ def format_srt_time(seconds: float) -> str:
52
+ hours = int(seconds // 3600)
53
+ minutes = int((seconds % 3600) // 60)
54
+ secs = int(seconds % 60)
55
+ millis = int((seconds % 1) * 1000)
56
+ return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}"
57
+
58
+ def generate_srt(segments) -> str:
59
+ srt_lines = []
60
+ for i, seg in enumerate(segments, 1):
61
+ start = format_srt_time(seg["start"])
62
+ end = format_srt_time(seg["end"])
63
+ text = seg["text"].strip()
64
+ srt_lines.append(f"{i}\n{start} --> {end}\n{text}\n")
65
+ return "\n".join(srt_lines)
66
+
67
+ @app.post("/align")
68
+ async def align_audio(
69
+ audio_file: UploadFile = File(...),
70
+ lyrics: str = Form("")
71
+ ):
72
+ temp_dir = tempfile.mkdtemp()
73
+
74
+ try:
75
+ # Save audio
76
+ audio_path = os.path.join(temp_dir, audio_file.filename or "audio.mp3")
77
+ with open(audio_path, "wb") as f:
78
+ shutil.copyfileobj(audio_file.file, f)
79
+
80
+ print(f"Processing: {audio_file.filename}")
81
+
82
+ # Transcribe with word timestamps
83
+ whisper = get_model()
84
+ segments_gen, info = whisper.transcribe(
85
+ audio_path,
86
+ word_timestamps=True,
87
+ language="en"
88
+ )
89
+
90
+ # Process segments
91
+ segments = []
92
+ word_segments = []
93
+
94
+ for segment in segments_gen:
95
+ seg_data = {
96
+ "start": segment.start,
97
+ "end": segment.end,
98
+ "text": segment.text,
99
+ "words": []
100
+ }
101
+
102
+ if segment.words:
103
+ for word in segment.words:
104
+ word_data = {
105
+ "word": word.word,
106
+ "start": word.start,
107
+ "end": word.end,
108
+ "score": getattr(word, 'probability', 0.9)
109
+ }
110
+ seg_data["words"].append(word_data)
111
+ word_segments.append(word_data)
112
+
113
+ segments.append(seg_data)
114
+
115
+ print(f"Processed {len(segments)} segments")
116
+
117
+ return {
118
+ "srt_content": generate_srt(segments),
119
+ "word_segments": word_segments,
120
+ "full_json": {"segments": segments}
121
+ }
122
+
123
+ except Exception as e:
124
+ import traceback
125
+ traceback.print_exc()
126
+ raise HTTPException(status_code=500, detail=str(e))
127
+
128
+ finally:
129
+ shutil.rmtree(temp_dir, ignore_errors=True)
130
+
131
+ if __name__ == "__main__":
132
+ import uvicorn
133
+ port = int(os.environ.get("PORT", 10000))
134
+ print(f"Starting LyricSync backend on port {port}...")
135
+ uvicorn.run(app, host="0.0.0.0", port=port)