Spaces:

Offex
/

Transcripttiktok

Running

App Files Files Community

Transcripttiktok / app.py

Offex

Update app.py

2d3b613 verified 2 months ago

raw

history blame

4.46 kB

	import gradio as gr
	import yt_dlp
	import os
	import shutil
	from faster_whisper import WhisperModel

	# --- 1. Model Setup (Turbo Settings) ---
	model = None

	def load_model():
	global model
	if model is None:
	print("📥 Loading Whisper Model (Base + Turbo Settings)...")
	# 'base' model with int8 quantization for speed/accuracy balance
	model = WhisperModel("base", device="cpu", compute_type="int8")
	print("✅ Model Loaded!")
	return model

	# --- 2. Logic: Download Audio from URL ---
	def download_audio_from_url(url):
	output_path = "downloaded_audio"
	# Cleanup old files
	if os.path.exists(f"{output_path}.mp3"): os.remove(f"{output_path}.mp3")

	ffmpeg_dir = "/usr/bin" # System Path for Hugging Face

	ydl_opts = {
	'format': 'bestaudio/best',
	'outtmpl': output_path,
	'ffmpeg_location': ffmpeg_dir,
	'postprocessors': [{
	'key': 'FFmpegExtractAudio',
	'preferredcodec': 'mp3',
	'preferredquality': '192',
	}],
	'quiet': True,
	'no_warnings': True,
	'nocheckcertificate': True,
	'http_headers': {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
	'Referer': 'https://www.tiktok.com/'
	}
	}

	try:
	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	ydl.download([url])
	return f"{output_path}.mp3"
	except Exception as e:
	raise Exception(f"URL Download Error: {str(e)}")

	# --- 3. Main Transcribe Function (Handles Both) ---
	def transcribe_media(url_input, file_input):

	# Decide source: Priority given to File if both exist, else URL
	audio_file_path = None

	try:
	# CASE 1: File Upload
	if file_input is not None:
	print(f"📂 Processing Uploaded File: {file_input}")
	audio_file_path = file_input

	# CASE 2: URL Input
	elif url_input and url_input.strip() != "":
	print(f"🔗 Processing URL: {url_input}")
	audio_file_path = download_audio_from_url(url_input)

	else:
	return "⚠️ Error: Please provide either a Link or Upload a File."

	# --- Transcribe ---
	if not os.path.exists(audio_file_path):
	return "❌ Error: File not found."

	current_model = load_model()

	# Turbo Settings: beam_size=1 (Fast), vad_filter=True (Skip Silence)
	segments, _ = current_model.transcribe(
	audio_file_path,
	beam_size=1,
	vad_filter=True
	)

	text = " ".join([s.text for s in segments])
	return text.strip()

	except Exception as e:
	return f"❌ Error: {str(e)}"

	# --- 4. Turbo UI with Tabs ---
	css = """
	.container {max-width: 900px; margin: auto;}
	.gr-button-primary {background: linear-gradient(90deg, #1CB5E0 0%, #000851 100%); border: none; color: white;}
	.tab-nav {font-weight: bold; font-size: 1.1em;}
	"""

	with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:

	with gr.Column(elem_classes="container"):
	gr.Markdown("# 🚀 Turbo Transcriber (Link & Upload)")
	gr.Markdown("Paste a TikTok link OR upload an Audio/Video file to get the text.")

	with gr.Tabs():

	# TAB 1: Link
	with gr.TabItem("🔗 Paste Link"):
	url_in = gr.Textbox(label="TikTok / YouTube URL", placeholder="https://...")
	btn_url = gr.Button("🚀 Transcribe Link", variant="primary")

	# TAB 2: File Upload
	with gr.TabItem("📂 Upload File"):
	file_in = gr.Audio(label="Upload Audio or Video", type="filepath", sources=["upload", "microphone"])
	btn_file = gr.Button("📂 Transcribe File", variant="primary")

	# Output Area (Common for both)
	transcript_out = gr.Code(label="Transcript Result", language="markdown", interactive=False, lines=15)

	# --- Button Actions ---
	# Logic: Pass 'None' to the unused input
	btn_url.click(
	fn=transcribe_media,
	inputs=[url_in, gr.State(None)], # Link diya, File None
	outputs=transcript_out
	)

	btn_file.click(
	fn=transcribe_media,
	inputs=[gr.State(None), file_in], # Link None, File diya
	outputs=transcript_out
	)

	demo.launch()