ayloll commited on
Commit
525e22b
·
verified ·
1 Parent(s): efd114f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -105
app.py CHANGED
@@ -1,114 +1,111 @@
1
- # First install required packages (this should be at the very top)
2
- #!pip install --upgrade gradio yt-dlp openai-whisper transformers ffmpeg-python pydub
3
-
4
  import gradio as gr
5
  from transformers import pipeline
6
  import yt_dlp
7
  import whisper
8
  import os
9
- import requests
10
  import uuid
11
  import re
12
 
13
- # Delete old files
14
- def clean_old_files():
15
- files = ["video.mp4", "audio.mp3", "transcription.txt"]
16
- for file in files:
17
  if os.path.exists(file):
18
  os.remove(file)
19
 
20
  # Download TikTok video
21
  def download_video(video_url):
22
- unique_name = f"video_{uuid.uuid4().hex[:8]}.mp4"
23
-
24
- ydl_opts = {
25
- 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
26
- 'outtmpl': unique_name,
27
- 'quiet': True,
28
- 'no_warnings': True,
29
- 'extractor_args': {
30
- 'tiktok': {
31
- 'skip_watermark': True
32
- }
33
- }
34
- }
35
-
36
  try:
 
 
 
 
 
 
 
 
37
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
38
  ydl.download([video_url])
39
- return unique_name
40
  except Exception as e:
41
- print(f"Error downloading video: {e}")
42
  return None
43
 
44
- # Extract audio from video
45
  def extract_audio(video_path):
46
- audio_path = "audio.mp3"
47
- os.system(f"ffmpeg -i \"{video_path}\" -vn -acodec libmp3lame -q:a 3 \"{audio_path}\" -y")
48
- if not os.path.exists(audio_path):
49
- raise RuntimeError("Error: Failed to extract audio.")
50
- return audio_path
51
 
52
- # Convert audio to text
53
  def transcribe_audio(audio_path):
54
  try:
55
  model = whisper.load_model("base")
56
  result = model.transcribe(audio_path)
57
  return result['text']
58
  except Exception as e:
59
- return f"Transcription error: {str(e)}"
 
60
 
61
  # Classify content
62
  def classify_content(text):
63
  try:
64
  if not text or len(text.strip()) == 0:
65
- return "No text to classify"
66
 
67
  classifier = pipeline("zero-shot-classification",
68
  model="facebook/bart-large-mnli")
69
 
70
- labels = ["Challenge", "Comedy", "Dance", "Educational", "TikTok Trend",
71
- "Music", "Lifestyle", "Beauty", "Cooking", "Fashion"]
72
 
73
- clean_text = ' '.join(text.split()[:500])
74
- result = classifier(clean_text,
75
  candidate_labels=labels,
76
- hypothesis_template="This content is about {}.")
77
 
78
- return f"{result['labels'][0]} (Confidence: {result['scores'][0]:.2f})"
79
  except Exception as e:
80
- return f"Classification error: {str(e)}"
 
81
 
82
- # Main video processing function
83
  def process_video(video_url):
84
- try:
85
- clean_old_files()
86
-
87
- if not video_url or len(video_url.strip()) == 0:
88
- return ["Please enter a valid video URL", "", None, None]
89
-
90
- if "tiktok.com" not in video_url and "vm.tiktok.com" not in video_url:
91
- return ["This app is for TikTok links only", "", None, None]
92
-
93
- print(f"Downloading video: {video_url}")
94
- video_path = download_video(video_url)
95
-
96
- if not video_path:
97
- return ["Failed to download video. Please check the URL.", "", None, None]
98
-
99
- print("Extracting audio...")
100
- audio_path = extract_audio(video_path)
101
-
102
- print("Transcribing audio...")
103
- transcription = transcribe_audio(audio_path)
104
-
105
- print("Classifying content...")
106
- category = classify_content(transcription)
107
-
108
- return [transcription, category, video_path, audio_path]
109
-
110
- except Exception as e:
111
- return [f"Processing error: {str(e)}", "", None, None]
 
 
 
 
 
 
 
 
 
112
 
113
  # Gradio interface
114
  with gr.Blocks(title="TikTok Content Analyzer") as demo:
@@ -120,56 +117,39 @@ with gr.Blocks(title="TikTok Content Analyzer") as demo:
120
  with gr.Row():
121
  url_input = gr.Textbox(
122
  label="TikTok URL",
123
- placeholder="Enter TikTok video URL here...",
124
- scale=4
125
  )
126
- submit_btn = gr.Button("Analyze Video", variant="primary", scale=1)
127
 
128
  with gr.Row():
129
- with gr.Column():
130
- transcription_output = gr.Textbox(
131
- label="Extracted Text",
132
- interactive=True,
133
- lines=10,
134
- max_lines=20
135
- )
136
- category_output = gr.Textbox(
137
- label="Content Category",
138
- interactive=False
139
- )
140
-
141
- with gr.Column():
142
- video_preview = gr.Video(
143
- label="Downloaded Video",
144
- interactive=False
145
- )
146
- audio_preview = gr.Audio(
147
- label="Extracted Audio",
148
- interactive=False
149
- )
150
 
151
- # TikTok URL examples
 
 
152
  gr.Examples(
153
  examples=[
154
  ["https://www.tiktok.com/@example/video/123456789"],
155
- ["https://www.tiktok.com/@user2/video/987654321"],
156
  ["https://vm.tiktok.com/ZMexample/"]
157
  ],
158
- inputs=url_input,
159
- label="Try these examples"
160
  )
161
 
162
- # Button click event
163
  submit_btn.click(
164
  fn=process_video,
165
  inputs=url_input,
166
- outputs=[transcription_output, category_output, video_preview, audio_preview]
167
  )
168
 
169
- # Launch the app
170
  if __name__ == "__main__":
171
- demo.launch(
172
- server_name="0.0.0.0",
173
- server_port=7860,
174
- share=False
175
- )
 
 
 
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  import yt_dlp
4
  import whisper
5
  import os
 
6
  import uuid
7
  import re
8
 
9
+ # Delete temporary files
10
+ def clean_temp_files():
11
+ temp_files = ["temp_video.mp4", "temp_audio.mp3"]
12
+ for file in temp_files:
13
  if os.path.exists(file):
14
  os.remove(file)
15
 
16
  # Download TikTok video
17
  def download_video(video_url):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  try:
19
+ ydl_opts = {
20
+ 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
21
+ 'outtmpl': 'temp_video.mp4',
22
+ 'quiet': True,
23
+ 'no_warnings': True,
24
+ 'extractor_args': {'tiktok': {'skip_watermark': True}}
25
+ }
26
+
27
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
28
  ydl.download([video_url])
29
+ return "temp_video.mp4"
30
  except Exception as e:
31
+ print(f"Download error: {e}")
32
  return None
33
 
34
+ # Extract audio (temporary)
35
  def extract_audio(video_path):
36
+ os.system(f"ffmpeg -i \"{video_path}\" -vn -acodec libmp3lame -q:a 3 \"temp_audio.mp3\" -y")
37
+ return "temp_audio.mp3" if os.path.exists("temp_audio.mp3") else None
 
 
 
38
 
39
+ # Transcribe audio
40
  def transcribe_audio(audio_path):
41
  try:
42
  model = whisper.load_model("base")
43
  result = model.transcribe(audio_path)
44
  return result['text']
45
  except Exception as e:
46
+ print(f"Transcription error: {e}")
47
+ return None
48
 
49
  # Classify content
50
  def classify_content(text):
51
  try:
52
  if not text or len(text.strip()) == 0:
53
+ return None, None
54
 
55
  classifier = pipeline("zero-shot-classification",
56
  model="facebook/bart-large-mnli")
57
 
58
+ labels = ["educational", "entertainment", "news", "political",
59
+ "religious", "technical", "advertisement", "social"]
60
 
61
+ result = classifier(text,
 
62
  candidate_labels=labels,
63
+ hypothesis_template="This text is about {}.")
64
 
65
+ return result['labels'][0], result['scores'][0]
66
  except Exception as e:
67
+ print(f"Classification error: {e}")
68
+ return None, None
69
 
70
+ # Main processing function
71
  def process_video(video_url):
72
+ clean_temp_files()
73
+
74
+ if not video_url or len(video_url.strip()) == 0:
75
+ return "Please enter a valid TikTok URL", ""
76
+
77
+ if "tiktok.com" not in video_url and "vm.tiktok.com" not in video_url:
78
+ return "This app is for TikTok links only", ""
79
+
80
+ # Download video
81
+ video_path = download_video(video_url)
82
+ if not video_path:
83
+ return "Failed to download video", ""
84
+
85
+ # Extract audio
86
+ audio_path = extract_audio(video_path)
87
+ if not audio_path:
88
+ clean_temp_files()
89
+ return "Failed to extract audio", ""
90
+
91
+ # Transcribe
92
+ transcription = transcribe_audio(audio_path)
93
+ if not transcription:
94
+ clean_temp_files()
95
+ return "Failed to transcribe audio", ""
96
+
97
+ # Classify
98
+ category, confidence = classify_content(transcription)
99
+ if not category:
100
+ clean_temp_files()
101
+ return transcription, "Failed to classify content"
102
+
103
+ # Clean up
104
+ clean_temp_files()
105
+
106
+ # Format classification result
107
+ classification_result = f"{category} (confidence: {confidence:.2f})"
108
+ return transcription, classification_result
109
 
110
  # Gradio interface
111
  with gr.Blocks(title="TikTok Content Analyzer") as demo:
 
117
  with gr.Row():
118
  url_input = gr.Textbox(
119
  label="TikTok URL",
120
+ placeholder="Enter TikTok video URL here..."
 
121
  )
 
122
 
123
  with gr.Row():
124
+ transcription_output = gr.Textbox(
125
+ label="Transcription",
126
+ interactive=True,
127
+ lines=10,
128
+ max_lines=20
129
+ )
130
+
131
+ with gr.Row():
132
+ category_output = gr.Textbox(
133
+ label="Content Category",
134
+ interactive=False
135
+ )
 
 
 
 
 
 
 
 
 
136
 
137
+ submit_btn = gr.Button("Analyze Video", variant="primary")
138
+
139
+ # Examples
140
  gr.Examples(
141
  examples=[
142
  ["https://www.tiktok.com/@example/video/123456789"],
 
143
  ["https://vm.tiktok.com/ZMexample/"]
144
  ],
145
+ inputs=url_input
 
146
  )
147
 
 
148
  submit_btn.click(
149
  fn=process_video,
150
  inputs=url_input,
151
+ outputs=[transcription_output, category_output]
152
  )
153
 
 
154
  if __name__ == "__main__":
155
+ demo.launch(server_name="0.0.0.0", server_port=7860)