Spaces:

ayloll
/

VideoToTexttik

Sleeping

App Files Files Community

ayloll commited on Jun 20, 2025

Commit

230bcec

verified ·

1 Parent(s): 2d2cc6d

Update app.py

Browse files

Files changed (1) hide show

app.py +148 -74

app.py CHANGED Viewed

@@ -1,96 +1,170 @@
 import gradio as gr
-from fastapi import FastAPI, HTTPException
 import yt_dlp
 import whisper
-import requests
 import os
 import re
-import asyncio
-from urllib.parse import urlparse
-app = FastAPI()
-# دالة لتحويل الروابط القصيرة
-def expand_tiktok_url(url):
-    try:
-        if not re.match(r'^https?://(www\.)?tiktok\.com', url):
-            session = requests.Session()
-            resp = session.head(url, allow_redirects=True, timeout=10)
-            return resp.url
-        return url
-    except:
-        return url
-# دالة لاستخراج ID الفيديو
-def get_video_id(url):
-    parsed = urlparse(url)
-    if 'tiktok.com' not in parsed.netloc:
-        return None
-    path_parts = parsed.path.split('/')
-    if 'video' in path_parts:
-        return path_parts[path_parts.index('video') + 1]
-    return None
-# دالة المعالجة الرئيسية
-async def process_video(url):
     try:
-        # 1. تحويل الروابط القصيرة
-        final_url = expand_tiktok_url(url)
-        # 2. التحقق من صحة الرابط
-        video_id = get_video_id(final_url)
-        if not video_id:
-            raise HTTPException(status_code=400, detail="Invalid TikTok URL")
-        # 3. تنزيل الفيديو
         ydl_opts = {
             'format': 'best[ext=mp4]',
-            'outtmpl': f'{video_id}.mp4',
             'quiet': True,
-            'extractor_args': {'tiktok': {'skip_watermark': True}}
         }
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            info = ydl.extract_info(final_url, download=True)
-            video_path = ydl.prepare_filename(info)
-        # 4. استخراج الصوت
-        audio_path = f'{video_id}.mp3'
-        os.system(f'ffmpeg -i "{video_path}" -vn -ar 16000 -ac 1 "{audio_path}"')
-        # 5. تحويل الصوت لنص
-        model = whisper.load_model("base")
-        result = model.transcribe(audio_path)
-        # 6. تنظيف الملفات المؤقتة
-        os.remove(video_path)
-        os.remove(audio_path)
-        return {
-            "transcription": result["text"],
-            "video_id": video_id
-        }
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-# واجهة API
-@app.get("/analyze")
-async def analyze(url: str):
-    return await process_video(url)
-# واجهة Gradio (اختيارية)
-with gr.Blocks() as demo:
-    gr.Markdown("## TikTok Analyzer")
-    url_input = gr.Textbox(label="TikTok URL")
-    output_text = gr.Textbox(label="Transcription")
-    def analyze_gradio(url):
-        result = process_video(url)
-        return result["transcription"]
-    demo.launch(server_name="0.0.0.0", server_port=7860)
-# للدمج مع FastAPI
-app = gr.mount_gradio_app(app, demo, path="/")

 import gradio as gr
+from transformers import pipeline
 import yt_dlp
 import whisper
 import os
 import re
+import tempfile
+import traceback
+# تهيئة النموذج مسبقاً لتحسين الأداء
+whisper_model = whisper.load_model("base")
+classifier = pipeline("zero-shot-classification",
+                     model="facebook/bart-large-mnli")
+# تصنيفات المحتوى
+CONTENT_LABELS = [
+    "educational", "entertainment", "news", "political",
+    "religious", "technical", "advertisement", "social"
+]
+def process_video(video_url):
+    """الدالة الرئيسية لمعالجة الفيديو"""
+    try:
+        # التحقق من صحة الرابط
+        if not is_valid_tiktok_url(video_url):
+            return "Invalid TikTok URL", ""
+        # إنشاء ملفات مؤقتة
+        with tempfile.NamedTemporaryFile(suffix='.mp4') as video_file, \
+             tempfile.NamedTemporaryFile(suffix='.mp3') as audio_file:
+            # تنزيل الفيديو
+            if not download_video(video_url, video_file.name):
+                return "Failed to download video", ""
+            # استخراج الصوت
+            if not extract_audio(video_file.name, audio_file.name):
+                return "Failed to extract audio", ""
+            # تحويل الصوت إلى نص
+            transcription = transcribe_audio(audio_file.name)
+            if not transcription:
+                return "Failed to transcribe audio", ""
+            # تصنيف المحتوى
+            category, confidence = classify_content(transcription)
+            # إرجاع النتائج
+            if category:
+                return transcription, f"{category} (confidence: {confidence:.2f})"
+            return transcription, "Classification failed"
+    except Exception as e:
+        print(f"Error: {str(e)}\n{traceback.format_exc()}")
+        return f"Processing error: {str(e)}", ""
+def is_valid_tiktok_url(url):
+    """التحقق من صحة رابط تيك توك"""
+    return bool(re.match(
+        r'^https?://(www\.|vm\.)?tiktok\.com/.+',
+        url,
+        re.IGNORECASE
+    ))
+def download_video(url, output_path):
+    """تنزيل فيديو تيك توك"""
     try:
         ydl_opts = {
             'format': 'best[ext=mp4]',
+            'outtmpl': output_path,
             'quiet': True,
+            'no_warnings': True,
+            'extractor_args': {'tiktok': {'skip_watermark': True}},
+            'socket_timeout': 10,
+            'retries': 3
         }
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            ydl.download([url])
+        return os.path.exists(output_path)
+    except:
+        return False
+def extract_audio(video_path, audio_path):
+    """استخراج الصوت من الفيديو"""
+    try:
+        os.system(
+            f"ffmpeg -i \"{video_path}\" "
+            f"-vn -acodec libmp3lame -q:a 2 \"{audio_path}\" -y"
+        )
+        return os.path.exists(audio_path)
+    except:
+        return False
+def transcribe_audio(audio_path):
+    """تحويل الصوت إلى نص"""
+    try:
+        result = whisper_model.transcribe(audio_path)
+        return result.get('text', '')
+    except:
+        return ""
+def classify_content(text):
+    """تصنيف المحتوى"""
+    try:
+        if not text.strip():
+            return None, None
+        result = classifier(
+            text,
+            candidate_labels=CONTENT_LABELS,
+            hypothesis_template="This text is about {}."
+        )
+        return result['labels'][0], result['scores'][0]
+    except:
+        return None, None
+# واجهة Gradio
+with gr.Blocks(title="TikTok Content Analyzer") as demo:
+    gr.Markdown("""
+    # 🎬 TikTok Content Analyzer
+    Analyze any TikTok video to get transcription and content classification
+    """)
+    with gr.Row():
+        url_input = gr.Textbox(
+            label="TikTok Video URL",
+            placeholder="Paste TikTok link here...",
+            max_lines=1
+        )
+    with gr.Row():
+        transcription_output = gr.Textbox(
+            label="Transcription",
+            interactive=True,
+            lines=10,
+            max_lines=20
+        )
+    with gr.Row():
+        category_output = gr.Textbox(
+            label="Content Category",
+            interactive=False
+        )
+    submit_btn = gr.Button("Analyze", variant="primary")
+    # أمثلة
+    gr.Examples(
+        examples=[
+            ["https://www.tiktok.com/@example/video/123456789"],
+            ["https://vm.tiktok.com/ZMexample/"]
+        ],
+        inputs=url_input,
+        label="Example TikTok URLs"
+    )
+    submit_btn.click(
+        fn=process_video,
+        inputs=url_input,
+        outputs=[transcription_output, category_output]
+    )
+# تشغيل التطبيق
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True
+    )