Spaces:

factorstudios
/

trans

Sleeping

App Files Files Community

factorstudios commited on Jun 3

Commit

e30b533

verified ·

1 Parent(s): a31ce2e

Create transcription_server.py

Browse files

Files changed (1) hide show

transcription_server.py +492 -0

transcription_server.py ADDED Viewed

	@@ -0,0 +1,492 @@

+#!/usr/bin/env python3
+import os
+import tempfile
+import shutil
+from pathlib import Path
+from datetime import datetime
+from dotenv import load_dotenv
+from fastapi import FastAPI, HTTPException, BackgroundTasks
+from fastapi.responses import HTMLResponse, JSONResponse
+from fastapi.staticfiles import StaticFiles
+from pydantic import BaseModel
+import uvicorn
+try:
+    from huggingface_hub import hf_hub_download, upload_file, list_repo_files
+    import whisper
+except ImportError as e:
+    print(f"Missing dependency: {e}")
+    exit(1)
+# Load environment variables
+load_dotenv()
+HF_TOKEN = os.getenv("HF_TOKEN")
+if not HF_TOKEN:
+    print("Error: HF_TOKEN not found in .env file")
+    exit(1)
+app = FastAPI(title="Movie Transcription Service")
+# In-memory job tracking
+jobs = {}
+class TranscriptionRequest(BaseModel):
+    dataset_link: str
+    model_size: str = "small"
+def format_timestamp(seconds: float) -> str:
+    """Convert seconds to HH:MM:SS format."""
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    secs = int(seconds % 60)
+    return f"{hours:02d}:{minutes:02d}:{secs:02d}"
+def transcribe_with_timestamps(video_path: str, model_size: str) -> str:
+    """Transcribe video and include timestamps."""
+    print(f"Loading Whisper model: {model_size}")
+    model = whisper.load_model(model_size)
+    print(f"Transcribing audio from: {video_path}")
+    result = model.transcribe(video_path)
+    # Format transcript with timestamps
+    transcript_lines = []
+    transcript_lines.append("=" * 80)
+    transcript_lines.append("MOVIE TRANSCRIPTION WITH TIMESTAMPS")
+    transcript_lines.append("=" * 80)
+    transcript_lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    transcript_lines.append("")
+    if "segments" in result:
+        for segment in result["segments"]:
+            timestamp = format_timestamp(segment["start"])
+            text = segment["text"].strip()
+            if text:
+                transcript_lines.append(f"[{timestamp}] {text}")
+    else:
+        # Fallback if segments not available
+        transcript_lines.append(result.get("text", ""))
+    return "\n".join(transcript_lines)
+def extract_dataset_info(dataset_link: str) -> tuple:
+    """Extract repo_id and filename from dataset link."""
+    # Examples:
+    # https://huggingface.co/datasets/factorstudios/movs/blob/main/Captain.America.Brave.New.World.(NKIRI.COM).2025.mkv
+    # factorstudios/movs/Captain.America.Brave.New.World.(NKIRI.COM).2025.mkv
+    link = dataset_link.strip()
+    if "huggingface.co" in link:
+        # Parse HF URL
+        parts = link.split("/")
+        if "datasets" in parts:
+            idx = parts.index("datasets")
+            owner = parts[idx + 1]
+            repo = parts[idx + 2]
+            # Find filename (after /blob/main/ or /blob/[branch]/)
+            if "blob" in parts:
+                blob_idx = parts.index("blob")
+                filename = "/".join(parts[blob_idx + 2:])
+            else:
+                filename = parts[-1]
+            repo_id = f"{owner}/{repo}"
+            return repo_id, filename
+    else:
+        # Assume it's in format: owner/repo/filename
+        parts = link.split("/")
+        if len(parts) >= 3:
+            repo_id = f"{parts[0]}/{parts[1]}"
+            filename = "/".join(parts[2:])
+            return repo_id, filename
+    raise ValueError(f"Cannot parse dataset link: {link}")
+async def process_transcription(job_id: str, dataset_link: str, model_size: str):
+    """Background task to process transcription and upload."""
+    try:
+        jobs[job_id]["status"] = "extracting_info"
+        # Parse dataset link
+        repo_id, filename = extract_dataset_info(dataset_link)
+        jobs[job_id]["repo_id"] = repo_id
+        jobs[job_id]["filename"] = filename
+        # Create temp directory
+        temp_dir = tempfile.mkdtemp()
+        try:
+            jobs[job_id]["status"] = "downloading"
+            print(f"Downloading {filename} from {repo_id}...")
+            # Download video
+            local_path = hf_hub_download(
+                repo_id=repo_id,
+                filename=filename,
+                repo_type="dataset",
+                token=HF_TOKEN,
+            )
+            # Resolve symlink if needed
+            if os.path.islink(local_path):
+                local_path = os.path.realpath(local_path)
+            # Copy to temp location
+            video_path = os.path.join(temp_dir, os.path.basename(filename))
+            shutil.copy2(local_path, video_path)
+            jobs[job_id]["status"] = "transcribing"
+            print(f"Starting transcription...")
+            # Transcribe with timestamps
+            transcript = transcribe_with_timestamps(video_path, model_size)
+            # Prepare transcript file
+            transcript_filename = os.path.splitext(os.path.basename(filename))[0] + ".transcript.txt"
+            transcript_path = os.path.join(temp_dir, transcript_filename)
+            with open(transcript_path, "w", encoding="utf-8") as f:
+                f.write(transcript)
+            jobs[job_id]["status"] = "uploading"
+            print(f"Uploading transcript to dataset...")
+            # Upload transcript to transcriptions folder
+            repo_upload_path = f"transcriptions/{transcript_filename}"
+            upload_file(
+                path_or_fileobj=transcript_path,
+                path_in_repo=repo_upload_path,
+                repo_id=repo_id,
+                repo_type="dataset",
+                token=HF_TOKEN,
+                commit_message=f"Add transcription for {os.path.basename(filename)}"
+            )
+            jobs[job_id]["status"] = "completed"
+            jobs[job_id]["transcript_path"] = repo_upload_path
+            print(f"✓ Transcription completed and uploaded to {repo_upload_path}")
+        finally:
+            # Cleanup temp directory
+            shutil.rmtree(temp_dir, ignore_errors=True)
+    except Exception as e:
+        jobs[job_id]["status"] = "failed"
+        jobs[job_id]["error"] = str(e)
+        print(f"✗ Error: {e}")
+@app.get("/", response_class=HTMLResponse)
+async def serve_ui():
+    """Serve the transcription UI."""
+    return """
+    <!DOCTYPE html>
+    <html lang="en">
+    <head>
+        <meta charset="UTF-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <title>Movie Transcription Service</title>
+        <style>
+            * {
+                margin: 0;
+                padding: 0;
+                box-sizing: border-box;
+            }
+            body {
+                font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+                min-height: 100vh;
+                display: flex;
+                align-items: center;
+                justify-content: center;
+                padding: 20px;
+            }
+            .container {
+                background: white;
+                border-radius: 12px;
+                box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3);
+                max-width: 600px;
+                width: 100%;
+                padding: 40px;
+            }
+            .header {
+                text-align: center;
+                margin-bottom: 30px;
+            }
+            .header h1 {
+                color: #333;
+                font-size: 28px;
+                margin-bottom: 10px;
+            }
+            .header p {
+                color: #666;
+                font-size: 14px;
+            }
+            .form-group {
+                margin-bottom: 20px;
+            }
+            label {
+                display: block;
+                margin-bottom: 8px;
+                color: #333;
+                font-weight: 500;
+                font-size: 14px;
+            }
+            input, select {
+                width: 100%;
+                padding: 12px;
+                border: 2px solid #e0e0e0;
+                border-radius: 6px;
+                font-size: 14px;
+                transition: border-color 0.3s;
+            }
+            input:focus, select:focus {
+                outline: none;
+                border-color: #667eea;
+            }
+            button {
+                width: 100%;
+                padding: 12px;
+                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+                color: white;
+                border: none;
+                border-radius: 6px;
+                font-size: 16px;
+                font-weight: 600;
+                cursor: pointer;
+                transition: transform 0.2s;
+            }
+            button:hover {
+                transform: translateY(-2px);
+            }
+            button:disabled {
+                opacity: 0.6;
+                cursor: not-allowed;
+                transform: none;
+            }
+            .status-section {
+                margin-top: 30px;
+                padding-top: 30px;
+                border-top: 2px solid #f0f0f0;
+            }
+            .status-item {
+                display: none;
+                padding: 16px;
+                border-radius: 6px;
+                margin-bottom: 12px;
+                font-size: 14px;
+            }
+            .status-item.active {
+                display: block;
+            }
+            .status-item.info {
+                background: #e3f2fd;
+                color: #1976d2;
+                border-left: 4px solid #1976d2;
+            }
+            .status-item.success {
+                background: #e8f5e9;
+                color: #388e3c;
+                border-left: 4px solid #388e3c;
+            }
+            .status-item.error {
+                background: #ffebee;
+                color: #d32f2f;
+                border-left: 4px solid #d32f2f;
+            }
+            .spinner {
+                display: inline-block;
+                width: 12px;
+                height: 12px;
+                border: 2px solid #ccc;
+                border-top-color: #1976d2;
+                border-radius: 50%;
+                animation: spin 0.6s linear infinite;
+                margin-right: 8px;
+            }
+            @keyframes spin {
+                to { transform: rotate(360deg); }
+            }
+            .job-id {
+                font-family: 'Courier New', monospace;
+                font-size: 12px;
+                color: #999;
+                margin-top: 8px;
+                word-break: break-all;
+            }
+        </style>
+    </head>
+    <body>
+        <div class="container">
+            <div class="header">
+                <h1>🎬 Movie Transcription Service</h1>
+                <p>Download, transcribe, and upload movie transcriptions with timestamps</p>
+            </div>
+            <form id="transcriptionForm">
+                <div class="form-group">
+                    <label for="datasetLink">Dataset Link or URL</label>
+                    <input
+                        type="text"
+                        id="datasetLink"
+                        placeholder="e.g., https://huggingface.co/datasets/factorstudios/movs/blob/main/movie.mkv"
+                        required
+                    >
+                </div>
+                <div class="form-group">
+                    <label for="modelSize">Whisper Model Size</label>
+                    <select id="modelSize">
+                        <option value="tiny">Tiny (Fast)</option>
+                        <option value="base">Base</option>
+                        <option value="small" selected>Small (Recommended)</option>
+                        <option value="medium">Medium</option>
+                        <option value="large">Large (Slow but Accurate)</option>
+                    </select>
+                </div>
+                <button type="submit" id="submitBtn">Start Transcription</button>
+            </form>
+            <div class="status-section" id="statusSection" style="display: none;">
+                <div id="statusMessages"></div>
+                <div class="job-id" id="jobId"></div>
+            </div>
+        </div>
+        <script>
+            const form = document.getElementById('transcriptionForm');
+            const statusSection = document.getElementById('statusSection');
+            const statusMessages = document.getElementById('statusMessages');
+            const jobId = document.getElementById('jobId');
+            const submitBtn = document.getElementById('submitBtn');
+            form.addEventListener('submit', async (e) => {
+                e.preventDefault();
+                const datasetLink = document.getElementById('datasetLink').value;
+                const modelSize = document.getElementById('modelSize').value;
+                submitBtn.disabled = true;
+                statusSection.style.display = 'block';
+                statusMessages.innerHTML = '';
+                try {
+                    // Submit transcription request
+                    const response = await fetch('/transcribe', {
+                        method: 'POST',
+                        headers: { 'Content-Type': 'application/json' },
+                        body: JSON.stringify({
+                            dataset_link: datasetLink,
+                            model_size: modelSize
+                        })
+                    });
+                    if (!response.ok) {
+                        throw new Error(await response.text());
+                    }
+                    const data = await response.json();
+                    const currentJobId = data.job_id;
+                    jobId.textContent = `Job ID: ${currentJobId}`;
+                    addStatus('info', '<span class="spinner"></span>Transcription started...', true);
+                    // Poll for status updates
+                    let completed = false;
+                    while (!completed) {
+                        await new Promise(resolve => setTimeout(resolve, 2000));
+                        const statusResponse = await fetch(`/status/${currentJobId}`);
+                        const statusData = await statusResponse.json();
+                        const status = statusData.status;
+                        if (status === 'completed') {
+                            addStatus('success', '✓ Transcription completed and uploaded!');
+                            addStatus('info', `📁 File: ${statusData.transcript_path}`);
+                            completed = true;
+                        } else if (status === 'failed') {
+                            addStatus('error', `✗ Error: ${statusData.error}`);
+                            completed = true;
+                        } else {
+                            const statusText = status.charAt(0).toUpperCase() + status.slice(1).replace(/_/g, ' ');
+                            addStatus('info', `<span class="spinner"></span>${statusText}...`, true);
+                        }
+                    }
+                } catch (error) {
+                    addStatus('error', `✗ Error: ${error.message}`);
+                } finally {
+                    submitBtn.disabled = false;
+                }
+            });
+            function addStatus(type, message, replace = false) {
+                if (replace) {
+                    statusMessages.innerHTML = '';
+                }
+                const div = document.createElement('div');
+                div.className = `status-item active ${type}`;
+                div.innerHTML = message;
+                statusMessages.appendChild(div);
+                statusMessages.parentElement.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
+            }
+        </script>
+    </body>
+    </html>
+    """
+@app.post("/transcribe")
+async def start_transcription(request: TranscriptionRequest, background_tasks: BackgroundTasks):
+    """Start a transcription job."""
+    import uuid
+    job_id = str(uuid.uuid4())
+    jobs[job_id] = {
+        "status": "queued",
+        "dataset_link": request.dataset_link,
+        "model_size": request.model_size,
+    }
+    background_tasks.add_task(
+        process_transcription,
+        job_id,
+        request.dataset_link,
+        request.model_size
+    )
+    return JSONResponse({"job_id": job_id})
+@app.get("/status/{job_id}")
+async def get_status(job_id: str):
+    """Get the status of a transcription job."""
+    if job_id not in jobs:
+        raise HTTPException(status_code=404, detail="Job not found")
+    return JSONResponse(jobs[job_id])
+if __name__ == "__main__":
+    print("Starting Movie Transcription Service...")
+    print("Open http://localhost:7860 in your browser")
+    uvicorn.run(app, host="0.0.0.0", port=7860)