Spaces:

factorstudios
/

trans

Running

App Files Files Community

trans / transcription_server.py

factorstudios

Update transcription_server.py

e5e57a4 verified 2 days ago

raw

history blame contribute delete

19 kB

	#!/usr/bin/env python3
	import os
	import tempfile
	import shutil
	from pathlib import Path
	from datetime import datetime
	from dotenv import load_dotenv

	from fastapi import FastAPI, HTTPException, BackgroundTasks
	from fastapi.responses import HTMLResponse, JSONResponse
	from fastapi.staticfiles import StaticFiles
	from pydantic import BaseModel
	import uvicorn

	try:
	from huggingface_hub import hf_hub_download, upload_file, list_repo_files
	import whisper
	except ImportError as e:
	print(f"Missing dependency: {e}")
	exit(1)

	# Load environment variables
	load_dotenv()
	HF_TOKEN = os.getenv("HF_TOKEN")
	if not HF_TOKEN:
	print("Error: HF_TOKEN not found in .env file")
	exit(1)

	app = FastAPI(title="Movie Transcription Service")

	# In-memory job tracking
	jobs = {}

	class TranscriptionRequest(BaseModel):
	dataset_link: str
	model_size: str = "small"

	def format_timestamp(seconds: float) -> str:
	"""Convert seconds to HH:MM:SS format."""
	hours = int(seconds // 3600)
	minutes = int((seconds % 3600) // 60)
	secs = int(seconds % 60)
	return f"{hours:02d}:{minutes:02d}:{secs:02d}"

	def transcribe_with_timestamps(video_path: str, model_size: str) -> str:
	"""Transcribe video and include timestamps."""
	print(f"Loading Whisper model: {model_size}")
	model = whisper.load_model(model_size)

	print(f"Transcribing audio from: {video_path}")
	result = model.transcribe(video_path)

	# Format transcript with timestamps
	transcript_lines = []
	transcript_lines.append("=" * 80)
	transcript_lines.append("MOVIE TRANSCRIPTION WITH TIMESTAMPS")
	transcript_lines.append("=" * 80)
	transcript_lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
	transcript_lines.append("")

	if "segments" in result:
	for segment in result["segments"]:
	timestamp = format_timestamp(segment["start"])
	text = segment["text"].strip()
	if text:
	transcript_lines.append(f"[{timestamp}] {text}")
	else:
	# Fallback if segments not available
	transcript_lines.append(result.get("text", ""))

	return "\n".join(transcript_lines)

	def extract_dataset_info(dataset_link: str) -> tuple:
	"""Extract repo_id and filename from dataset link."""
	# Examples:
	# https://huggingface.co/datasets/factorstudios/movs/blob/main/Captain.America.Brave.New.World.(NKIRI.COM).2025.mkv
	# factorstudios/movs/Captain.America.Brave.New.World.(NKIRI.COM).2025.mkv

	link = dataset_link.strip()

	# Validate input
	if not link:
	raise ValueError("Dataset link cannot be empty")

	if any(char in link for char in ["=", "\n", "\r", "DASHSCOPE", "API", "TOKEN"]):
	raise ValueError(
	"Invalid dataset link format. Please provide a valid Hugging Face dataset URL or path.\n"
	"Examples:\n"
	" https://huggingface.co/datasets/factorstudios/movs/blob/main/movie.mkv\n"
	" factorstudios/movs/movie.mkv"
	)

	if "huggingface.co" in link:
	# Parse HF URL
	parts = link.split("/")
	if "datasets" in parts:
	try:
	idx = parts.index("datasets")
	owner = parts[idx + 1]
	repo = parts[idx + 2]
	# Find filename (after /blob/main/ or /blob/[branch]/)
	if "blob" in parts:
	blob_idx = parts.index("blob")
	filename = "/".join(parts[blob_idx + 2:])
	else:
	filename = parts[-1]
	repo_id = f"{owner}/{repo}"

	if not filename:
	raise ValueError("No filename found in URL")

	return repo_id, filename
	except (IndexError, ValueError) as e:
	raise ValueError(f"Invalid Hugging Face dataset URL format: {e}")
	else:
	# Assume it's in format: owner/repo/filename
	parts = link.split("/")
	if len(parts) >= 3:
	repo_id = f"{parts[0]}/{parts[1]}"
	filename = "/".join(parts[2:])

	if not filename:
	raise ValueError("No filename found in path")

	return repo_id, filename

	raise ValueError(
	f"Cannot parse dataset link. Please use:\n"
	f" https://huggingface.co/datasets/owner/repo/blob/main/file.mkv\n"
	f" or: owner/repo/file.mkv"
	)

	async def process_transcription(job_id: str, dataset_link: str, model_size: str):
	"""Background task to process transcription and upload."""
	try:
	jobs[job_id]["status"] = "extracting_info"

	# Parse and validate dataset link
	try:
	repo_id, filename = extract_dataset_info(dataset_link)
	except ValueError as e:
	raise ValueError(f"Invalid dataset link: {str(e)}")

	jobs[job_id]["repo_id"] = repo_id
	jobs[job_id]["filename"] = filename

	# Create temp directory
	temp_dir = tempfile.mkdtemp()
	try:
	jobs[job_id]["status"] = "downloading"
	print(f"Downloading {filename} from {repo_id}...")

	# Download video
	local_path = hf_hub_download(
	repo_id=repo_id,
	filename=filename,
	repo_type="dataset",
	token=HF_TOKEN,
	)

	# Resolve symlink if needed
	if os.path.islink(local_path):
	local_path = os.path.realpath(local_path)

	# Copy to temp location
	video_path = os.path.join(temp_dir, os.path.basename(filename))
	shutil.copy2(local_path, video_path)

	jobs[job_id]["status"] = "transcribing"
	print(f"Starting transcription...")

	# Transcribe with timestamps
	transcript = transcribe_with_timestamps(video_path, model_size)

	# Prepare transcript file
	transcript_filename = os.path.splitext(os.path.basename(filename))[0] + ".transcript.txt"
	transcript_path = os.path.join(temp_dir, transcript_filename)

	with open(transcript_path, "w", encoding="utf-8") as f:
	f.write(transcript)

	jobs[job_id]["status"] = "uploading"
	print(f"Uploading transcript to dataset...")

	# Upload transcript to transcriptions folder
	repo_upload_path = f"transcriptions/{transcript_filename}"

	upload_file(
	path_or_fileobj=transcript_path,
	path_in_repo=repo_upload_path,
	repo_id=repo_id,
	repo_type="dataset",
	token=HF_TOKEN,
	commit_message=f"Add transcription for {os.path.basename(filename)}"
	)

	jobs[job_id]["status"] = "completed"
	jobs[job_id]["transcript_path"] = repo_upload_path
	print(f"✓ Transcription completed and uploaded to {repo_upload_path}")

	finally:
	# Cleanup temp directory
	shutil.rmtree(temp_dir, ignore_errors=True)

	except Exception as e:
	jobs[job_id]["status"] = "failed"
	jobs[job_id]["error"] = str(e)
	print(f"✗ Error: {e}")

	@app.get("/", response_class=HTMLResponse)
	async def serve_ui():
	"""Serve the transcription UI."""
	return """
	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>Movie Transcription Service</title>
	<style>
	* {
	margin: 0;
	padding: 0;
	box-sizing: border-box;
	}

	body {
	font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	min-height: 100vh;
	display: flex;
	align-items: center;
	justify-content: center;
	padding: 20px;
	}

	.container {
	background: white;
	border-radius: 12px;
	box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3);
	max-width: 600px;
	width: 100%;
	padding: 40px;
	}

	.header {
	text-align: center;
	margin-bottom: 30px;
	}

	.header h1 {
	color: #333;
	font-size: 28px;
	margin-bottom: 10px;
	}

	.header p {
	color: #666;
	font-size: 14px;
	}

	.form-group {
	margin-bottom: 20px;
	}

	label {
	display: block;
	margin-bottom: 8px;
	color: #333;
	font-weight: 500;
	font-size: 14px;
	}

	input, select {
	width: 100%;
	padding: 12px;
	border: 2px solid #e0e0e0;
	border-radius: 6px;
	font-size: 14px;
	transition: border-color 0.3s;
	}

	input:focus, select:focus {
	outline: none;
	border-color: #667eea;
	}

	button {
	width: 100%;
	padding: 12px;
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	border: none;
	border-radius: 6px;
	font-size: 16px;
	font-weight: 600;
	cursor: pointer;
	transition: transform 0.2s;
	}

	button:hover {
	transform: translateY(-2px);
	}

	button:disabled {
	opacity: 0.6;
	cursor: not-allowed;
	transform: none;
	}

	.status-section {
	margin-top: 30px;
	padding-top: 30px;
	border-top: 2px solid #f0f0f0;
	}

	.status-item {
	display: none;
	padding: 16px;
	border-radius: 6px;
	margin-bottom: 12px;
	font-size: 14px;
	}

	.status-item.active {
	display: block;
	}

	.status-item.info {
	background: #e3f2fd;
	color: #1976d2;
	border-left: 4px solid #1976d2;
	}

	.status-item.success {
	background: #e8f5e9;
	color: #388e3c;
	border-left: 4px solid #388e3c;
	}

	.status-item.error {
	background: #ffebee;
	color: #d32f2f;
	border-left: 4px solid #d32f2f;
	}

	.spinner {
	display: inline-block;
	width: 12px;
	height: 12px;
	border: 2px solid #ccc;
	border-top-color: #1976d2;
	border-radius: 50%;
	animation: spin 0.6s linear infinite;
	margin-right: 8px;
	}

	@keyframes spin {
	to { transform: rotate(360deg); }
	}

	.job-id {
	font-family: 'Courier New', monospace;
	font-size: 12px;
	color: #999;
	margin-top: 8px;
	word-break: break-all;
	}
	</style>
	</head>
	<body>
	<div class="container">
	<div class="header">
	<h1>🎬 Movie Transcription Service</h1>
	<p>Download, transcribe, and upload movie transcriptions with timestamps</p>
	</div>

	<form id="transcriptionForm">
	<div class="form-group">
	<label for="datasetLink">Dataset Link or URL</label>
	<input
	type="text"
	id="datasetLink"
	placeholder="https://huggingface.co/datasets/factorstudios/movs/blob/main/movie.mkv"
	title="Enter a Hugging Face dataset URL or path (owner/repo/filename.mkv)"
	required
	>
	<small style="display: block; margin-top: 6px; color: #999; font-size: 12px;">
	Format: https://huggingface.co/datasets/owner/repo/blob/main/filename.mkv<br>
	or: owner/repo/filename.mkv
	</small>
	</div>

	<div class="form-group">
	<label for="modelSize">Whisper Model Size</label>
	<select id="modelSize">
	<option value="tiny">Tiny (Fast)</option>
	<option value="base">Base</option>
	<option value="small" selected>Small (Recommended)</option>
	<option value="medium">Medium</option>
	<option value="large">Large (Slow but Accurate)</option>
	</select>
	</div>

	<button type="submit" id="submitBtn">Start Transcription</button>
	</form>

	<div class="status-section" id="statusSection" style="display: none;">
	<div id="statusMessages"></div>
	<div class="job-id" id="jobId"></div>
	</div>
	</div>

	<script>
	const form = document.getElementById('transcriptionForm');
	const statusSection = document.getElementById('statusSection');
	const statusMessages = document.getElementById('statusMessages');
	const jobId = document.getElementById('jobId');
	const submitBtn = document.getElementById('submitBtn');

	form.addEventListener('submit', async (e) => {
	e.preventDefault();

	const datasetLink = document.getElementById('datasetLink').value;
	const modelSize = document.getElementById('modelSize').value;

	submitBtn.disabled = true;
	statusSection.style.display = 'block';
	statusMessages.innerHTML = '';

	try {
	// Submit transcription request
	const response = await fetch('/transcribe', {
	method: 'POST',
	headers: { 'Content-Type': 'application/json' },
	body: JSON.stringify({
	dataset_link: datasetLink,
	model_size: modelSize
	})
	});

	if (!response.ok) {
	throw new Error(await response.text());
	}

	const data = await response.json();
	const currentJobId = data.job_id;
	jobId.textContent = `Job ID: ${currentJobId}`;

	addStatus('info', '<span class="spinner"></span>Transcription started...', true);

	// Poll for status updates
	let completed = false;
	while (!completed) {
	await new Promise(resolve => setTimeout(resolve, 2000));

	const statusResponse = await fetch(`/status/${currentJobId}`);
	const statusData = await statusResponse.json();

	const status = statusData.status;

	if (status === 'completed') {
	addStatus('success', '✓ Transcription completed and uploaded!');
	addStatus('info', `📁 File: ${statusData.transcript_path}`);
	completed = true;
	} else if (status === 'failed') {
	addStatus('error', `✗ Error: ${statusData.error}`);
	completed = true;
	} else {
	const statusText = status.charAt(0).toUpperCase() + status.slice(1).replace(/_/g, ' ');
	addStatus('info', `<span class="spinner"></span>${statusText}...`, true);
	}
	}
	} catch (error) {
	addStatus('error', `✗ Error: ${error.message}`);
	} finally {
	submitBtn.disabled = false;
	}
	});

	function addStatus(type, message, replace = false) {
	if (replace) {
	statusMessages.innerHTML = '';
	}
	const div = document.createElement('div');
	div.className = `status-item active ${type}`;
	div.innerHTML = message;
	statusMessages.appendChild(div);
	statusMessages.parentElement.scrollIntoView({ behavior: 'smooth', block: 'nearest' });
	}
	</script>
	</body>
	</html>
	"""

	@app.post("/transcribe")
	async def start_transcription(request: TranscriptionRequest, background_tasks: BackgroundTasks):
	"""Start a transcription job."""
	import uuid

	job_id = str(uuid.uuid4())
	jobs[job_id] = {
	"status": "queued",
	"dataset_link": request.dataset_link,
	"model_size": request.model_size,
	}

	background_tasks.add_task(
	process_transcription,
	job_id,
	request.dataset_link,
	request.model_size
	)

	return JSONResponse({"job_id": job_id})

	@app.get("/status/{job_id}")
	async def get_status(job_id: str):
	"""Get the status of a transcription job."""
	if job_id not in jobs:
	raise HTTPException(status_code=404, detail="Job not found")

	return JSONResponse(jobs[job_id])

	if __name__ == "__main__":
	print("Starting Movie Transcription Service...")
	print("Open http://localhost:7860 in your browser")
	uvicorn.run(app, host="0.0.0.0", port=7860)