ayloll commited on
Commit
2d2cc6d
·
verified ·
1 Parent(s): 525e22b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -130
app.py CHANGED
@@ -1,155 +1,96 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
  import yt_dlp
4
  import whisper
 
5
  import os
6
- import uuid
7
  import re
 
 
8
 
9
- # Delete temporary files
10
- def clean_temp_files():
11
- temp_files = ["temp_video.mp4", "temp_audio.mp3"]
12
- for file in temp_files:
13
- if os.path.exists(file):
14
- os.remove(file)
15
 
16
- # Download TikTok video
17
- def download_video(video_url):
18
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  ydl_opts = {
20
- 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
21
- 'outtmpl': 'temp_video.mp4',
22
  'quiet': True,
23
- 'no_warnings': True,
24
  'extractor_args': {'tiktok': {'skip_watermark': True}}
25
  }
26
 
27
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
28
- ydl.download([video_url])
29
- return "temp_video.mp4"
30
- except Exception as e:
31
- print(f"Download error: {e}")
32
- return None
33
-
34
- # Extract audio (temporary)
35
- def extract_audio(video_path):
36
- os.system(f"ffmpeg -i \"{video_path}\" -vn -acodec libmp3lame -q:a 3 \"temp_audio.mp3\" -y")
37
- return "temp_audio.mp3" if os.path.exists("temp_audio.mp3") else None
38
-
39
- # Transcribe audio
40
- def transcribe_audio(audio_path):
41
- try:
42
  model = whisper.load_model("base")
43
  result = model.transcribe(audio_path)
44
- return result['text']
45
- except Exception as e:
46
- print(f"Transcription error: {e}")
47
- return None
48
-
49
- # Classify content
50
- def classify_content(text):
51
- try:
52
- if not text or len(text.strip()) == 0:
53
- return None, None
54
-
55
- classifier = pipeline("zero-shot-classification",
56
- model="facebook/bart-large-mnli")
57
 
58
- labels = ["educational", "entertainment", "news", "political",
59
- "religious", "technical", "advertisement", "social"]
 
60
 
61
- result = classifier(text,
62
- candidate_labels=labels,
63
- hypothesis_template="This text is about {}.")
 
64
 
65
- return result['labels'][0], result['scores'][0]
66
  except Exception as e:
67
- print(f"Classification error: {e}")
68
- return None, None
69
 
70
- # Main processing function
71
- def process_video(video_url):
72
- clean_temp_files()
73
-
74
- if not video_url or len(video_url.strip()) == 0:
75
- return "Please enter a valid TikTok URL", ""
76
-
77
- if "tiktok.com" not in video_url and "vm.tiktok.com" not in video_url:
78
- return "This app is for TikTok links only", ""
79
-
80
- # Download video
81
- video_path = download_video(video_url)
82
- if not video_path:
83
- return "Failed to download video", ""
84
-
85
- # Extract audio
86
- audio_path = extract_audio(video_path)
87
- if not audio_path:
88
- clean_temp_files()
89
- return "Failed to extract audio", ""
90
-
91
- # Transcribe
92
- transcription = transcribe_audio(audio_path)
93
- if not transcription:
94
- clean_temp_files()
95
- return "Failed to transcribe audio", ""
96
-
97
- # Classify
98
- category, confidence = classify_content(transcription)
99
- if not category:
100
- clean_temp_files()
101
- return transcription, "Failed to classify content"
102
-
103
- # Clean up
104
- clean_temp_files()
105
-
106
- # Format classification result
107
- classification_result = f"{category} (confidence: {confidence:.2f})"
108
- return transcription, classification_result
109
 
110
- # Gradio interface
111
- with gr.Blocks(title="TikTok Content Analyzer") as demo:
112
- gr.Markdown("""
113
- # 🎬 TikTok Content Analyzer
114
- Enter a TikTok video URL to get transcription and content classification
115
- """)
116
-
117
- with gr.Row():
118
- url_input = gr.Textbox(
119
- label="TikTok URL",
120
- placeholder="Enter TikTok video URL here..."
121
- )
122
-
123
- with gr.Row():
124
- transcription_output = gr.Textbox(
125
- label="Transcription",
126
- interactive=True,
127
- lines=10,
128
- max_lines=20
129
- )
130
-
131
- with gr.Row():
132
- category_output = gr.Textbox(
133
- label="Content Category",
134
- interactive=False
135
- )
136
-
137
- submit_btn = gr.Button("Analyze Video", variant="primary")
138
 
139
- # Examples
140
- gr.Examples(
141
- examples=[
142
- ["https://www.tiktok.com/@example/video/123456789"],
143
- ["https://vm.tiktok.com/ZMexample/"]
144
- ],
145
- inputs=url_input
146
- )
147
 
148
- submit_btn.click(
149
- fn=process_video,
150
- inputs=url_input,
151
- outputs=[transcription_output, category_output]
152
- )
153
 
154
- if __name__ == "__main__":
155
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import gradio as gr
2
+ from fastapi import FastAPI, HTTPException
3
  import yt_dlp
4
  import whisper
5
+ import requests
6
  import os
 
7
  import re
8
+ import asyncio
9
+ from urllib.parse import urlparse
10
 
11
+ app = FastAPI()
 
 
 
 
 
12
 
13
+ # دالة لتحويل الروابط القصيرة
14
+ def expand_tiktok_url(url):
15
  try:
16
+ if not re.match(r'^https?://(www\.)?tiktok\.com', url):
17
+ session = requests.Session()
18
+ resp = session.head(url, allow_redirects=True, timeout=10)
19
+ return resp.url
20
+ return url
21
+ except:
22
+ return url
23
+
24
+ # دالة لاستخراج ID الفيديو
25
+ def get_video_id(url):
26
+ parsed = urlparse(url)
27
+ if 'tiktok.com' not in parsed.netloc:
28
+ return None
29
+
30
+ path_parts = parsed.path.split('/')
31
+ if 'video' in path_parts:
32
+ return path_parts[path_parts.index('video') + 1]
33
+ return None
34
+
35
+ # دالة المعالجة الرئيسية
36
+ async def process_video(url):
37
+ try:
38
+ # 1. تحويل الروابط القصيرة
39
+ final_url = expand_tiktok_url(url)
40
+
41
+ # 2. التحقق من صحة الرابط
42
+ video_id = get_video_id(final_url)
43
+ if not video_id:
44
+ raise HTTPException(status_code=400, detail="Invalid TikTok URL")
45
+
46
+ # 3. تنزيل الفيديو
47
  ydl_opts = {
48
+ 'format': 'best[ext=mp4]',
49
+ 'outtmpl': f'{video_id}.mp4',
50
  'quiet': True,
 
51
  'extractor_args': {'tiktok': {'skip_watermark': True}}
52
  }
53
 
54
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
55
+ info = ydl.extract_info(final_url, download=True)
56
+ video_path = ydl.prepare_filename(info)
57
+
58
+ # 4. استخراج الصوت
59
+ audio_path = f'{video_id}.mp3'
60
+ os.system(f'ffmpeg -i "{video_path}" -vn -ar 16000 -ac 1 "{audio_path}"')
61
+
62
+ # 5. تحويل الصوت لنص
 
 
 
 
 
 
63
  model = whisper.load_model("base")
64
  result = model.transcribe(audio_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
+ # 6. تنظيف الملفات المؤقتة
67
+ os.remove(video_path)
68
+ os.remove(audio_path)
69
 
70
+ return {
71
+ "transcription": result["text"],
72
+ "video_id": video_id
73
+ }
74
 
 
75
  except Exception as e:
76
+ raise HTTPException(status_code=500, detail=str(e))
 
77
 
78
+ # واجهة API
79
+ @app.get("/analyze")
80
+ async def analyze(url: str):
81
+ return await process_video(url)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ # واجهة Gradio (اختيارية)
84
+ with gr.Blocks() as demo:
85
+ gr.Markdown("## TikTok Analyzer")
86
+ url_input = gr.Textbox(label="TikTok URL")
87
+ output_text = gr.Textbox(label="Transcription")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
+ def analyze_gradio(url):
90
+ result = process_video(url)
91
+ return result["transcription"]
 
 
 
 
 
92
 
93
+ demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
94
 
95
+ # للدمج مع FastAPI
96
+ app = gr.mount_gradio_app(app, demo, path="/")