Spaces:

ayloll
/

VideoToTexttik

Sleeping

App Files Files Community

VideoToTexttik / app.py

ayloll

Update app.py

1d234a6 verified 7 months ago

raw

history blame contribute delete

4.51 kB

	import gradio as gr
	from transformers import pipeline
	import yt_dlp
	import whisper
	import os
	import uuid
	import re

	# Delete temporary files
	def clean_temp_files():
	temp_files = ["temp_video.mp4", "temp_audio.mp3"]
	for file in temp_files:
	if os.path.exists(file):
	os.remove(file)

	# Download TikTok video
	def download_video(video_url):
	try:
	ydl_opts = {
	'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
	'outtmpl': 'temp_video.mp4',
	'quiet': True,
	'no_warnings': True,
	'extractor_args': {'tiktok': {'skip_watermark': True}}
	}

	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	ydl.download([video_url])
	return "temp_video.mp4"
	except Exception as e:
	print(f"Download error: {e}")
	return None

	# Extract audio (temporary)
	def extract_audio(video_path):
	os.system(f"ffmpeg -i \"{video_path}\" -vn -acodec libmp3lame -q:a 3 \"temp_audio.mp3\" -y")
	return "temp_audio.mp3" if os.path.exists("temp_audio.mp3") else None

	# Transcribe audio
	def transcribe_audio(audio_path):
	try:
	model = whisper.load_model("base")
	result = model.transcribe(audio_path)
	return result['text']
	except Exception as e:
	print(f"Transcription error: {e}")
	return None

	# Classify content
	def classify_content(text):
	try:
	if not text or len(text.strip()) == 0:
	return None, None

	classifier = pipeline("zero-shot-classification",
	model="facebook/bart-large-mnli")

	labels = ["educational", "entertainment", "news", "political",
	"religious", "technical", "advertisement", "social"]

	result = classifier(text,
	candidate_labels=labels,
	hypothesis_template="This text is about {}.")

	return result['labels'][0], result['scores'][0]
	except Exception as e:
	print(f"Classification error: {e}")
	return None, None

	# Main processing function
	def process_video(video_url):
	clean_temp_files()

	if not video_url or len(video_url.strip()) == 0:
	return "Please enter a valid TikTok URL", ""

	if "tiktok.com" not in video_url and "vm.tiktok.com" not in video_url:
	return "This app is for TikTok links only", ""

	# Download video
	video_path = download_video(video_url)
	if not video_path:
	return "Failed to download video", ""

	# Extract audio
	audio_path = extract_audio(video_path)
	if not audio_path:
	clean_temp_files()
	return "Failed to extract audio", ""

	# Transcribe
	transcription = transcribe_audio(audio_path)
	if not transcription:
	clean_temp_files()
	return "Failed to transcribe audio", ""

	# Classify
	category, confidence = classify_content(transcription)
	if not category:
	clean_temp_files()
	return transcription, "Failed to classify content"

	# Clean up
	clean_temp_files()

	# Format classification result
	classification_result = f"{category} (confidence: {confidence:.2f})"
	return transcription, classification_result

	# Gradio interface
	with gr.Blocks(title="TikTok Content Analyzer") as demo:
	gr.Markdown("""
	# 🎬 TikTok Content Analyzer
	Enter a TikTok video URL to get transcription and content classification
	""")

	with gr.Row():
	url_input = gr.Textbox(
	label="TikTok URL",
	placeholder="Enter TikTok video URL here..."
	)

	with gr.Row():
	transcription_output = gr.Textbox(
	label="Transcription",
	interactive=True,
	lines=10,
	max_lines=20
	)

	with gr.Row():
	category_output = gr.Textbox(
	label="Content Category",
	interactive=False
	)

	submit_btn = gr.Button("Analyze Video", variant="primary")

	# Examples
	gr.Examples(
	examples=[
	["https://www.tiktok.com/@example/video/123456789"],
	["https://vm.tiktok.com/ZMexample/"]
	],
	inputs=url_input
	)

	submit_btn.click(
	fn=process_video,
	inputs=url_input,
	outputs=[transcription_output, category_output]
	)

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)