ayloll commited on
Commit
1d234a6
·
verified ·
1 Parent(s): 230bcec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -105
app.py CHANGED
@@ -3,130 +3,121 @@ from transformers import pipeline
3
  import yt_dlp
4
  import whisper
5
  import os
 
6
  import re
7
- import tempfile
8
- import traceback
9
 
10
- # تهيئة النموذج مسبقاً لتحسين الأداء
11
- whisper_model = whisper.load_model("base")
12
- classifier = pipeline("zero-shot-classification",
13
- model="facebook/bart-large-mnli")
 
 
14
 
15
- # تصنيفات المحتوى
16
- CONTENT_LABELS = [
17
- "educational", "entertainment", "news", "political",
18
- "religious", "technical", "advertisement", "social"
19
- ]
20
-
21
- def process_video(video_url):
22
- """الدالة الرئيسية لمعالجة الفيديو"""
23
- try:
24
- # التحقق من صحة الرابط
25
- if not is_valid_tiktok_url(video_url):
26
- return "Invalid TikTok URL", ""
27
-
28
- # إنشاء ملفات مؤقتة
29
- with tempfile.NamedTemporaryFile(suffix='.mp4') as video_file, \
30
- tempfile.NamedTemporaryFile(suffix='.mp3') as audio_file:
31
-
32
- # تنزيل الفيديو
33
- if not download_video(video_url, video_file.name):
34
- return "Failed to download video", ""
35
-
36
- # استخراج الصوت
37
- if not extract_audio(video_file.name, audio_file.name):
38
- return "Failed to extract audio", ""
39
-
40
- # تحويل الصوت إلى نص
41
- transcription = transcribe_audio(audio_file.name)
42
- if not transcription:
43
- return "Failed to transcribe audio", ""
44
-
45
- # تصنيف المحتوى
46
- category, confidence = classify_content(transcription)
47
-
48
- # إرجاع النتائج
49
- if category:
50
- return transcription, f"{category} (confidence: {confidence:.2f})"
51
- return transcription, "Classification failed"
52
-
53
- except Exception as e:
54
- print(f"Error: {str(e)}\n{traceback.format_exc()}")
55
- return f"Processing error: {str(e)}", ""
56
-
57
- def is_valid_tiktok_url(url):
58
- """التحقق من صحة رابط تيك توك"""
59
- return bool(re.match(
60
- r'^https?://(www\.|vm\.)?tiktok\.com/.+',
61
- url,
62
- re.IGNORECASE
63
- ))
64
-
65
- def download_video(url, output_path):
66
- """تنزيل فيديو تيك توك"""
67
  try:
68
  ydl_opts = {
69
- 'format': 'best[ext=mp4]',
70
- 'outtmpl': output_path,
71
  'quiet': True,
72
  'no_warnings': True,
73
- 'extractor_args': {'tiktok': {'skip_watermark': True}},
74
- 'socket_timeout': 10,
75
- 'retries': 3
76
  }
77
 
78
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
79
- ydl.download([url])
80
- return os.path.exists(output_path)
81
- except:
82
- return False
 
83
 
84
- def extract_audio(video_path, audio_path):
85
- """استخراج الصوت من الفيديو"""
86
- try:
87
- os.system(
88
- f"ffmpeg -i \"{video_path}\" "
89
- f"-vn -acodec libmp3lame -q:a 2 \"{audio_path}\" -y"
90
- )
91
- return os.path.exists(audio_path)
92
- except:
93
- return False
94
 
 
95
  def transcribe_audio(audio_path):
96
- """تحويل الصوت إلى نص"""
97
  try:
98
- result = whisper_model.transcribe(audio_path)
99
- return result.get('text', '')
100
- except:
101
- return ""
 
 
102
 
 
103
  def classify_content(text):
104
- """تصنيف المحتوى"""
105
  try:
106
- if not text.strip():
107
  return None, None
108
 
109
- result = classifier(
110
- text,
111
- candidate_labels=CONTENT_LABELS,
112
- hypothesis_template="This text is about {}."
113
- )
 
 
 
 
 
114
  return result['labels'][0], result['scores'][0]
115
- except:
 
116
  return None, None
117
 
118
- # واجهة Gradio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  with gr.Blocks(title="TikTok Content Analyzer") as demo:
120
  gr.Markdown("""
121
  # 🎬 TikTok Content Analyzer
122
- Analyze any TikTok video to get transcription and content classification
123
  """)
124
 
125
  with gr.Row():
126
  url_input = gr.Textbox(
127
- label="TikTok Video URL",
128
- placeholder="Paste TikTok link here...",
129
- max_lines=1
130
  )
131
 
132
  with gr.Row():
@@ -143,16 +134,15 @@ with gr.Blocks(title="TikTok Content Analyzer") as demo:
143
  interactive=False
144
  )
145
 
146
- submit_btn = gr.Button("Analyze", variant="primary")
147
 
148
- # أمثلة
149
  gr.Examples(
150
  examples=[
151
  ["https://www.tiktok.com/@example/video/123456789"],
152
  ["https://vm.tiktok.com/ZMexample/"]
153
  ],
154
- inputs=url_input,
155
- label="Example TikTok URLs"
156
  )
157
 
158
  submit_btn.click(
@@ -161,10 +151,5 @@ with gr.Blocks(title="TikTok Content Analyzer") as demo:
161
  outputs=[transcription_output, category_output]
162
  )
163
 
164
- # تشغيل التطبيق
165
  if __name__ == "__main__":
166
- demo.launch(
167
- server_name="0.0.0.0",
168
- server_port=7860,
169
- show_error=True
170
- )
 
3
  import yt_dlp
4
  import whisper
5
  import os
6
+ import uuid
7
  import re
 
 
8
 
9
+ # Delete temporary files
10
+ def clean_temp_files():
11
+ temp_files = ["temp_video.mp4", "temp_audio.mp3"]
12
+ for file in temp_files:
13
+ if os.path.exists(file):
14
+ os.remove(file)
15
 
16
+ # Download TikTok video
17
+ def download_video(video_url):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  try:
19
  ydl_opts = {
20
+ 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
21
+ 'outtmpl': 'temp_video.mp4',
22
  'quiet': True,
23
  'no_warnings': True,
24
+ 'extractor_args': {'tiktok': {'skip_watermark': True}}
 
 
25
  }
26
 
27
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
28
+ ydl.download([video_url])
29
+ return "temp_video.mp4"
30
+ except Exception as e:
31
+ print(f"Download error: {e}")
32
+ return None
33
 
34
+ # Extract audio (temporary)
35
+ def extract_audio(video_path):
36
+ os.system(f"ffmpeg -i \"{video_path}\" -vn -acodec libmp3lame -q:a 3 \"temp_audio.mp3\" -y")
37
+ return "temp_audio.mp3" if os.path.exists("temp_audio.mp3") else None
 
 
 
 
 
 
38
 
39
+ # Transcribe audio
40
  def transcribe_audio(audio_path):
 
41
  try:
42
+ model = whisper.load_model("base")
43
+ result = model.transcribe(audio_path)
44
+ return result['text']
45
+ except Exception as e:
46
+ print(f"Transcription error: {e}")
47
+ return None
48
 
49
+ # Classify content
50
  def classify_content(text):
 
51
  try:
52
+ if not text or len(text.strip()) == 0:
53
  return None, None
54
 
55
+ classifier = pipeline("zero-shot-classification",
56
+ model="facebook/bart-large-mnli")
57
+
58
+ labels = ["educational", "entertainment", "news", "political",
59
+ "religious", "technical", "advertisement", "social"]
60
+
61
+ result = classifier(text,
62
+ candidate_labels=labels,
63
+ hypothesis_template="This text is about {}.")
64
+
65
  return result['labels'][0], result['scores'][0]
66
+ except Exception as e:
67
+ print(f"Classification error: {e}")
68
  return None, None
69
 
70
+ # Main processing function
71
+ def process_video(video_url):
72
+ clean_temp_files()
73
+
74
+ if not video_url or len(video_url.strip()) == 0:
75
+ return "Please enter a valid TikTok URL", ""
76
+
77
+ if "tiktok.com" not in video_url and "vm.tiktok.com" not in video_url:
78
+ return "This app is for TikTok links only", ""
79
+
80
+ # Download video
81
+ video_path = download_video(video_url)
82
+ if not video_path:
83
+ return "Failed to download video", ""
84
+
85
+ # Extract audio
86
+ audio_path = extract_audio(video_path)
87
+ if not audio_path:
88
+ clean_temp_files()
89
+ return "Failed to extract audio", ""
90
+
91
+ # Transcribe
92
+ transcription = transcribe_audio(audio_path)
93
+ if not transcription:
94
+ clean_temp_files()
95
+ return "Failed to transcribe audio", ""
96
+
97
+ # Classify
98
+ category, confidence = classify_content(transcription)
99
+ if not category:
100
+ clean_temp_files()
101
+ return transcription, "Failed to classify content"
102
+
103
+ # Clean up
104
+ clean_temp_files()
105
+
106
+ # Format classification result
107
+ classification_result = f"{category} (confidence: {confidence:.2f})"
108
+ return transcription, classification_result
109
+
110
+ # Gradio interface
111
  with gr.Blocks(title="TikTok Content Analyzer") as demo:
112
  gr.Markdown("""
113
  # 🎬 TikTok Content Analyzer
114
+ Enter a TikTok video URL to get transcription and content classification
115
  """)
116
 
117
  with gr.Row():
118
  url_input = gr.Textbox(
119
+ label="TikTok URL",
120
+ placeholder="Enter TikTok video URL here..."
 
121
  )
122
 
123
  with gr.Row():
 
134
  interactive=False
135
  )
136
 
137
+ submit_btn = gr.Button("Analyze Video", variant="primary")
138
 
139
+ # Examples
140
  gr.Examples(
141
  examples=[
142
  ["https://www.tiktok.com/@example/video/123456789"],
143
  ["https://vm.tiktok.com/ZMexample/"]
144
  ],
145
+ inputs=url_input
 
146
  )
147
 
148
  submit_btn.click(
 
151
  outputs=[transcription_output, category_output]
152
  )
153
 
 
154
  if __name__ == "__main__":
155
+ demo.launch(server_name="0.0.0.0", server_port=7860)