Spaces:

ayloll
/

VideoToTexttik

Sleeping

App Files Files Community

ayloll commited on Jun 20, 2025

Commit

2d2cc6d

verified ·

1 Parent(s): 525e22b

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -130

app.py CHANGED Viewed

@@ -1,155 +1,96 @@
 import gradio as gr
-from transformers import pipeline
 import yt_dlp
 import whisper
 import os
-import uuid
 import re
-# Delete temporary files
-def clean_temp_files():
-    temp_files = ["temp_video.mp4", "temp_audio.mp3"]
-    for file in temp_files:
-        if os.path.exists(file):
-            os.remove(file)
-# Download TikTok video
-def download_video(video_url):
     try:
         ydl_opts = {
-            'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
-            'outtmpl': 'temp_video.mp4',
             'quiet': True,
-            'no_warnings': True,
             'extractor_args': {'tiktok': {'skip_watermark': True}}
         }
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            ydl.download([video_url])
-        return "temp_video.mp4"
-    except Exception as e:
-        print(f"Download error: {e}")
-        return None
-# Extract audio (temporary)
-def extract_audio(video_path):
-    os.system(f"ffmpeg -i \"{video_path}\" -vn -acodec libmp3lame -q:a 3 \"temp_audio.mp3\" -y")
-    return "temp_audio.mp3" if os.path.exists("temp_audio.mp3") else None
-# Transcribe audio
-def transcribe_audio(audio_path):
-    try:
         model = whisper.load_model("base")
         result = model.transcribe(audio_path)
-        return result['text']
-    except Exception as e:
-        print(f"Transcription error: {e}")
-        return None
-# Classify content
-def classify_content(text):
-    try:
-        if not text or len(text.strip()) == 0:
-            return None, None
-        classifier = pipeline("zero-shot-classification",
-                           model="facebook/bart-large-mnli")
-        labels = ["educational", "entertainment", "news", "political",
-                "religious", "technical", "advertisement", "social"]
-        result = classifier(text,
-                          candidate_labels=labels,
-                          hypothesis_template="This text is about {}.")
-        return result['labels'][0], result['scores'][0]
     except Exception as e:
-        print(f"Classification error: {e}")
-        return None, None
-# Main processing function
-def process_video(video_url):
-    clean_temp_files()
-    if not video_url or len(video_url.strip()) == 0:
-        return "Please enter a valid TikTok URL", ""
-    if "tiktok.com" not in video_url and "vm.tiktok.com" not in video_url:
-        return "This app is for TikTok links only", ""
-    # Download video
-    video_path = download_video(video_url)
-    if not video_path:
-        return "Failed to download video", ""
-    # Extract audio
-    audio_path = extract_audio(video_path)
-    if not audio_path:
-        clean_temp_files()
-        return "Failed to extract audio", ""
-    # Transcribe
-    transcription = transcribe_audio(audio_path)
-    if not transcription:
-        clean_temp_files()
-        return "Failed to transcribe audio", ""
-    # Classify
-    category, confidence = classify_content(transcription)
-    if not category:
-        clean_temp_files()
-        return transcription, "Failed to classify content"
-    # Clean up
-    clean_temp_files()
-    # Format classification result
-    classification_result = f"{category} (confidence: {confidence:.2f})"
-    return transcription, classification_result
-# Gradio interface
-with gr.Blocks(title="TikTok Content Analyzer") as demo:
-    gr.Markdown("""
-    # 🎬 TikTok Content Analyzer
-    Enter a TikTok video URL to get transcription and content classification
-    """)
-    with gr.Row():
-        url_input = gr.Textbox(
-            label="TikTok URL",
-            placeholder="Enter TikTok video URL here..."
-        )
-    with gr.Row():
-        transcription_output = gr.Textbox(
-            label="Transcription",
-            interactive=True,
-            lines=10,
-            max_lines=20
-        )
-    with gr.Row():
-        category_output = gr.Textbox(
-            label="Content Category",
-            interactive=False
-        )
-    submit_btn = gr.Button("Analyze Video", variant="primary")
-    # Examples
-    gr.Examples(
-        examples=[
-            ["https://www.tiktok.com/@example/video/123456789"],
-            ["https://vm.tiktok.com/ZMexample/"]
-        ],
-        inputs=url_input
-    )
-    submit_btn.click(
-        fn=process_video,
-        inputs=url_input,
-        outputs=[transcription_output, category_output]
-    )
-if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
+from fastapi import FastAPI, HTTPException
 import yt_dlp
 import whisper
+import requests
 import os
 import re
+import asyncio
+from urllib.parse import urlparse
+app = FastAPI()
+# دالة لتحويل الروابط القصيرة
+def expand_tiktok_url(url):
     try:
+        if not re.match(r'^https?://(www\.)?tiktok\.com', url):
+            session = requests.Session()
+            resp = session.head(url, allow_redirects=True, timeout=10)
+            return resp.url
+        return url
+    except:
+        return url
+# دالة لاستخراج ID الفيديو
+def get_video_id(url):
+    parsed = urlparse(url)
+    if 'tiktok.com' not in parsed.netloc:
+        return None
+    path_parts = parsed.path.split('/')
+    if 'video' in path_parts:
+        return path_parts[path_parts.index('video') + 1]
+    return None
+# دالة المعالجة الرئيسية
+async def process_video(url):
+    try:
+        # 1. تحويل الروابط القصيرة
+        final_url = expand_tiktok_url(url)
+        # 2. التحقق من صحة الرابط
+        video_id = get_video_id(final_url)
+        if not video_id:
+            raise HTTPException(status_code=400, detail="Invalid TikTok URL")
+        # 3. تنزيل الفيديو
         ydl_opts = {
+            'format': 'best[ext=mp4]',
+            'outtmpl': f'{video_id}.mp4',
             'quiet': True,
             'extractor_args': {'tiktok': {'skip_watermark': True}}
         }
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(final_url, download=True)
+            video_path = ydl.prepare_filename(info)
+        # 4. استخراج الصوت
+        audio_path = f'{video_id}.mp3'
+        os.system(f'ffmpeg -i "{video_path}" -vn -ar 16000 -ac 1 "{audio_path}"')
+        # 5. تحويل الصوت لنص
         model = whisper.load_model("base")
         result = model.transcribe(audio_path)
+        # 6. تنظيف الملفات المؤقتة
+        os.remove(video_path)
+        os.remove(audio_path)
+        return {
+            "transcription": result["text"],
+            "video_id": video_id
+        }
     except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# واجهة API
+@app.get("/analyze")
+async def analyze(url: str):
+    return await process_video(url)
+# واجهة Gradio (اختيارية)
+with gr.Blocks() as demo:
+    gr.Markdown("## TikTok Analyzer")
+    url_input = gr.Textbox(label="TikTok URL")
+    output_text = gr.Textbox(label="Transcription")
+    def analyze_gradio(url):
+        result = process_video(url)
+        return result["transcription"]
+    demo.launch(server_name="0.0.0.0", server_port=7860)
+# للدمج مع FastAPI
+app = gr.mount_gradio_app(app, demo, path="/")