Spaces:

ayloll
/

VideoToTexttik

Sleeping

App Files Files Community

ayloll commited on Jun 20, 2025

Commit

1d234a6

verified ·

1 Parent(s): 230bcec

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -105

app.py CHANGED Viewed

@@ -3,130 +3,121 @@ from transformers import pipeline
 import yt_dlp
 import whisper
 import os
 import re
-import tempfile
-import traceback
-# تهيئة النموذج مسبقاً لتحسين الأداء
-whisper_model = whisper.load_model("base")
-classifier = pipeline("zero-shot-classification",
-                     model="facebook/bart-large-mnli")
-# تصنيفات المحتوى
-CONTENT_LABELS = [
-    "educational", "entertainment", "news", "political",
-    "religious", "technical", "advertisement", "social"
-]
-def process_video(video_url):
-    """الدالة الرئيسية لمعالجة الفيديو"""
-    try:
-        # التحقق من صحة الرابط
-        if not is_valid_tiktok_url(video_url):
-            return "Invalid TikTok URL", ""
-        # إنشاء ملفات مؤقتة
-        with tempfile.NamedTemporaryFile(suffix='.mp4') as video_file, \
-             tempfile.NamedTemporaryFile(suffix='.mp3') as audio_file:
-            # تنزيل الفيديو
-            if not download_video(video_url, video_file.name):
-                return "Failed to download video", ""
-            # استخراج الصوت
-            if not extract_audio(video_file.name, audio_file.name):
-                return "Failed to extract audio", ""
-            # تحويل الصوت إلى نص
-            transcription = transcribe_audio(audio_file.name)
-            if not transcription:
-                return "Failed to transcribe audio", ""
-            # تصنيف المحتوى
-            category, confidence = classify_content(transcription)
-            # إرجاع النتائج
-            if category:
-                return transcription, f"{category} (confidence: {confidence:.2f})"
-            return transcription, "Classification failed"
-    except Exception as e:
-        print(f"Error: {str(e)}\n{traceback.format_exc()}")
-        return f"Processing error: {str(e)}", ""
-def is_valid_tiktok_url(url):
-    """التحقق من صحة رابط تيك توك"""
-    return bool(re.match(
-        r'^https?://(www\.|vm\.)?tiktok\.com/.+',
-        url,
-        re.IGNORECASE
-    ))
-def download_video(url, output_path):
-    """تنزيل فيديو تيك توك"""
     try:
         ydl_opts = {
-            'format': 'best[ext=mp4]',
-            'outtmpl': output_path,
             'quiet': True,
             'no_warnings': True,
-            'extractor_args': {'tiktok': {'skip_watermark': True}},
-            'socket_timeout': 10,
-            'retries': 3
         }
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            ydl.download([url])
-        return os.path.exists(output_path)
-    except:
-        return False
-def extract_audio(video_path, audio_path):
-    """استخراج الصوت من الفيديو"""
-    try:
-        os.system(
-            f"ffmpeg -i \"{video_path}\" "
-            f"-vn -acodec libmp3lame -q:a 2 \"{audio_path}\" -y"
-        )
-        return os.path.exists(audio_path)
-    except:
-        return False
 def transcribe_audio(audio_path):
-    """تحويل الصوت إلى نص"""
     try:
-        result = whisper_model.transcribe(audio_path)
-        return result.get('text', '')
-    except:
-        return ""
 def classify_content(text):
-    """تصنيف المحتوى"""
     try:
-        if not text.strip():
             return None, None
-        result = classifier(
-            text,
-            candidate_labels=CONTENT_LABELS,
-            hypothesis_template="This text is about {}."
-        )
         return result['labels'][0], result['scores'][0]
-    except:
         return None, None
-# واجهة Gradio
 with gr.Blocks(title="TikTok Content Analyzer") as demo:
     gr.Markdown("""
     # 🎬 TikTok Content Analyzer
-    Analyze any TikTok video to get transcription and content classification
     """)
     with gr.Row():
         url_input = gr.Textbox(
-            label="TikTok Video URL",
-            placeholder="Paste TikTok link here...",
-            max_lines=1
         )
     with gr.Row():
@@ -143,16 +134,15 @@ with gr.Blocks(title="TikTok Content Analyzer") as demo:
             interactive=False
         )
-    submit_btn = gr.Button("Analyze", variant="primary")
-    # أمثلة
     gr.Examples(
         examples=[
             ["https://www.tiktok.com/@example/video/123456789"],
             ["https://vm.tiktok.com/ZMexample/"]
         ],
-        inputs=url_input,
-        label="Example TikTok URLs"
     )
     submit_btn.click(
@@ -161,10 +151,5 @@ with gr.Blocks(title="TikTok Content Analyzer") as demo:
         outputs=[transcription_output, category_output]
     )
-# تشغيل التطبيق
 if __name__ == "__main__":
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True
-    )

 import yt_dlp
 import whisper
 import os
+import uuid
 import re
+# Delete temporary files
+def clean_temp_files():
+    temp_files = ["temp_video.mp4", "temp_audio.mp3"]
+    for file in temp_files:
+        if os.path.exists(file):
+            os.remove(file)
+# Download TikTok video
+def download_video(video_url):
     try:
         ydl_opts = {
+            'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
+            'outtmpl': 'temp_video.mp4',
             'quiet': True,
             'no_warnings': True,
+            'extractor_args': {'tiktok': {'skip_watermark': True}}
         }
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            ydl.download([video_url])
+        return "temp_video.mp4"
+    except Exception as e:
+        print(f"Download error: {e}")
+        return None
+# Extract audio (temporary)
+def extract_audio(video_path):
+    os.system(f"ffmpeg -i \"{video_path}\" -vn -acodec libmp3lame -q:a 3 \"temp_audio.mp3\" -y")
+    return "temp_audio.mp3" if os.path.exists("temp_audio.mp3") else None
+# Transcribe audio
 def transcribe_audio(audio_path):
     try:
+        model = whisper.load_model("base")
+        result = model.transcribe(audio_path)
+        return result['text']
+    except Exception as e:
+        print(f"Transcription error: {e}")
+        return None
+# Classify content
 def classify_content(text):
     try:
+        if not text or len(text.strip()) == 0:
             return None, None
+        classifier = pipeline("zero-shot-classification",
+                           model="facebook/bart-large-mnli")
+        labels = ["educational", "entertainment", "news", "political",
+                "religious", "technical", "advertisement", "social"]
+        result = classifier(text,
+                          candidate_labels=labels,
+                          hypothesis_template="This text is about {}.")
         return result['labels'][0], result['scores'][0]
+    except Exception as e:
+        print(f"Classification error: {e}")
         return None, None
+# Main processing function
+def process_video(video_url):
+    clean_temp_files()
+    if not video_url or len(video_url.strip()) == 0:
+        return "Please enter a valid TikTok URL", ""
+    if "tiktok.com" not in video_url and "vm.tiktok.com" not in video_url:
+        return "This app is for TikTok links only", ""
+    # Download video
+    video_path = download_video(video_url)
+    if not video_path:
+        return "Failed to download video", ""
+    # Extract audio
+    audio_path = extract_audio(video_path)
+    if not audio_path:
+        clean_temp_files()
+        return "Failed to extract audio", ""
+    # Transcribe
+    transcription = transcribe_audio(audio_path)
+    if not transcription:
+        clean_temp_files()
+        return "Failed to transcribe audio", ""
+    # Classify
+    category, confidence = classify_content(transcription)
+    if not category:
+        clean_temp_files()
+        return transcription, "Failed to classify content"
+    # Clean up
+    clean_temp_files()
+    # Format classification result
+    classification_result = f"{category} (confidence: {confidence:.2f})"
+    return transcription, classification_result
+# Gradio interface
 with gr.Blocks(title="TikTok Content Analyzer") as demo:
     gr.Markdown("""
     # 🎬 TikTok Content Analyzer
+    Enter a TikTok video URL to get transcription and content classification
     """)
     with gr.Row():
         url_input = gr.Textbox(
+            label="TikTok URL",
+            placeholder="Enter TikTok video URL here..."
         )
     with gr.Row():
             interactive=False
         )
+    submit_btn = gr.Button("Analyze Video", variant="primary")
+    # Examples
     gr.Examples(
         examples=[
             ["https://www.tiktok.com/@example/video/123456789"],
             ["https://vm.tiktok.com/ZMexample/"]
         ],
+        inputs=url_input
     )
     submit_btn.click(
         outputs=[transcription_output, category_output]
     )
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)