import gradio as gr from transformers import pipeline import yt_dlp import whisper import os import uuid import re # Delete temporary files def clean_temp_files(): temp_files = ["temp_video.mp4", "temp_audio.mp3"] for file in temp_files: if os.path.exists(file): os.remove(file) # Download TikTok video def download_video(video_url): try: ydl_opts = { 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]', 'outtmpl': 'temp_video.mp4', 'quiet': True, 'no_warnings': True, 'extractor_args': {'tiktok': {'skip_watermark': True}} } with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([video_url]) return "temp_video.mp4" except Exception as e: print(f"Download error: {e}") return None # Extract audio (temporary) def extract_audio(video_path): os.system(f"ffmpeg -i \"{video_path}\" -vn -acodec libmp3lame -q:a 3 \"temp_audio.mp3\" -y") return "temp_audio.mp3" if os.path.exists("temp_audio.mp3") else None # Transcribe audio def transcribe_audio(audio_path): try: model = whisper.load_model("base") result = model.transcribe(audio_path) return result['text'] except Exception as e: print(f"Transcription error: {e}") return None # Classify content def classify_content(text): try: if not text or len(text.strip()) == 0: return None, None classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") labels = ["educational", "entertainment", "news", "political", "religious", "technical", "advertisement", "social"] result = classifier(text, candidate_labels=labels, hypothesis_template="This text is about {}.") return result['labels'][0], result['scores'][0] except Exception as e: print(f"Classification error: {e}") return None, None # Main processing function def process_video(video_url): clean_temp_files() if not video_url or len(video_url.strip()) == 0: return "Please enter a valid TikTok URL", "" if "tiktok.com" not in video_url and "vm.tiktok.com" not in video_url: return "This app is for TikTok links only", "" # Download video video_path = download_video(video_url) if not video_path: return "Failed to download video", "" # Extract audio audio_path = extract_audio(video_path) if not audio_path: clean_temp_files() return "Failed to extract audio", "" # Transcribe transcription = transcribe_audio(audio_path) if not transcription: clean_temp_files() return "Failed to transcribe audio", "" # Classify category, confidence = classify_content(transcription) if not category: clean_temp_files() return transcription, "Failed to classify content" # Clean up clean_temp_files() # Format classification result classification_result = f"{category} (confidence: {confidence:.2f})" return transcription, classification_result # Gradio interface with gr.Blocks(title="TikTok Content Analyzer") as demo: gr.Markdown(""" # 🎬 TikTok Content Analyzer Enter a TikTok video URL to get transcription and content classification """) with gr.Row(): url_input = gr.Textbox( label="TikTok URL", placeholder="Enter TikTok video URL here..." ) with gr.Row(): transcription_output = gr.Textbox( label="Transcription", interactive=True, lines=10, max_lines=20 ) with gr.Row(): category_output = gr.Textbox( label="Content Category", interactive=False ) submit_btn = gr.Button("Analyze Video", variant="primary") # Examples gr.Examples( examples=[ ["https://www.tiktok.com/@example/video/123456789"], ["https://vm.tiktok.com/ZMexample/"] ], inputs=url_input ) submit_btn.click( fn=process_video, inputs=url_input, outputs=[transcription_output, category_output] ) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)