ayloll commited on
Commit
e48ee91
·
verified ·
1 Parent(s): 383c593

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -132
app.py CHANGED
@@ -1,7 +1,4 @@
1
- !apt update && apt install ffmpeg
2
- !pip install yt-dlp openai-whisper pydub ffmpeg
3
- !pip install -U openai-whisper
4
-
5
  from transformers import pipeline
6
  import yt_dlp
7
  import whisper
@@ -10,140 +7,76 @@ import requests
10
  import uuid
11
  import re
12
 
13
- # Delete old files before starting a new process
14
- def clean_old_files():
15
- if os.path.exists("video.mp4"): os.remove("video.mp4")
16
- if os.path.exists("audio.mp3"): os.remove("audio.mp3")
17
- if os.path.exists("transcription.txt"): os.remove("transcription.txt")
18
 
19
- # Download video from YouTube or TikTok
20
- def download_video(video_url):
21
- unique_name = f"video_{uuid.uuid4().hex[:8]}.mp4"
22
-
23
- # Check if it's a TikTok URL
24
- if "tiktok.com" in video_url:
25
- ydl_opts = {
26
- 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
27
- 'outtmpl': unique_name,
28
- 'quiet': True,
29
- 'no_warnings': True,
30
- }
31
- else: # YouTube or other platforms
32
- ydl_opts = {
33
- 'format': 'mp4',
34
- 'outtmpl': unique_name,
35
- }
36
-
37
  try:
38
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
39
- ydl.download([video_url])
40
- return unique_name
41
- except Exception as e:
42
- print(f"Error downloading video: {e}")
43
- return None
44
-
45
- # Download video from a direct link
46
- def download_direct_video(video_url):
47
- unique_name = f"video_{uuid.uuid4().hex[:8]}.mp4"
48
- headers = {
49
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
50
- }
51
- try:
52
- response = requests.get(video_url, headers=headers, stream=True)
53
- response.raise_for_status()
54
- with open(unique_name, "wb") as f:
55
- for chunk in response.iter_content(chunk_size=1024):
56
- if chunk:
57
- f.write(chunk)
58
- return unique_name
 
 
 
 
 
 
 
 
 
 
 
59
  except Exception as e:
60
- print(f"Error downloading direct video: {e}")
61
- return None
62
-
63
- # Extract audio from the video using ffmpeg
64
- def extract_audio(video_path):
65
- audio_path = "audio.mp3"
66
- os.system(f"ffmpeg -i \"{video_path}\" -vn -acodec libmp3lame -q:a 3 \"{audio_path}\" -y")
67
- if not os.path.exists(audio_path):
68
- raise RuntimeError("Error: Failed to extract audio.")
69
- return audio_path
70
-
71
- # Convert audio to text using Whisper model
72
- def transcribe_audio(audio_path):
73
- model = whisper.load_model("large")
74
- result = model.transcribe(audio_path)
75
- return result['text']
76
 
77
- # Classify text content using Zero-shot Classification
78
- def classify_content(text):
79
- classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
80
- labels = ["educational", "entertainment", "news", "political", "religious",
81
- "technical", "advertisement", "social", "music", "comedy", "dance",
82
- "challenge", "tutorial", "vlog", "prank", "beauty", "fashion"]
83
 
84
- # Clean text to improve classification
85
- clean_text = ' '.join(text.split()[:500]) # Use first 500 words to avoid token limits
 
86
 
87
- result = classifier(clean_text, candidate_labels=labels, hypothesis_template="This text is about {}.")
 
 
 
 
 
 
 
88
 
89
- top_label = result['labels'][0]
90
- confidence = result['scores'][0]
91
-
92
- print("\nVideo Content Classification:")
93
- print(f"Predicted Category: {top_label} with confidence: {confidence:.2f}")
94
-
95
- return top_label
96
-
97
- # Check if URL is TikTok
98
- def is_tiktok_url(url):
99
- tiktok_pattern = r'(https?://)?(www\.)?tiktok\.com/.+'
100
- return re.match(tiktok_pattern, url) is not None
101
 
102
- # Main script
103
  if __name__ == "__main__":
104
- video_url = input("Enter the video URL or path to .mp4 file: ").strip()
105
-
106
- clean_old_files()
107
-
108
- if video_url.endswith(".mp4"):
109
- video_path = video_url
110
- elif is_tiktok_url(video_url):
111
- print("Downloading video from TikTok...")
112
- video_path = download_video(video_url)
113
- elif "youtube.com" in video_url or "youtu.be" in video_url:
114
- print("Downloading video from YouTube...")
115
- video_path = download_video(video_url)
116
- else:
117
- print("Downloading video from direct link...")
118
- video_path = download_direct_video(video_url)
119
-
120
- if not video_path or not os.path.exists(video_path):
121
- print("Error: Failed to download video.")
122
- exit(1)
123
-
124
- print("Extracting audio...\n")
125
- try:
126
- audio_path = extract_audio(video_path)
127
- except Exception as e:
128
- print(f"Error extracting audio: {e}")
129
- exit(1)
130
-
131
- print("Transcribing...\n")
132
- try:
133
- transcription = transcribe_audio(audio_path)
134
- except Exception as e:
135
- print(f"Error transcribing audio: {e}")
136
- exit(1)
137
-
138
- print("\nTranscription Result:\n")
139
- print(transcription)
140
-
141
- with open("transcription.txt", "w", encoding="utf-8") as f:
142
- f.write(transcription)
143
-
144
- # Classify the content based on transcribed text
145
- print("\nClassifying content...")
146
- try:
147
- classify_content(transcription)
148
- except Exception as e:
149
- print(f"Error classifying content: {e}")
 
1
+ import gradio as gr
 
 
 
2
  from transformers import pipeline
3
  import yt_dlp
4
  import whisper
 
7
  import uuid
8
  import re
9
 
10
+ # [Keep all your existing functions here: clean_old_files, download_video,
11
+ # download_direct_video, extract_audio, transcribe_audio, classify_content, is_tiktok_url]
 
 
 
12
 
13
+ def process_video(video_url):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  try:
15
+ clean_old_files()
16
+
17
+ if video_url.endswith(".mp4"):
18
+ video_path = video_url
19
+ elif is_tiktok_url(video_url):
20
+ print("Downloading video from TikTok...")
21
+ video_path = download_video(video_url)
22
+ elif "youtube.com" in video_url or "youtu.be" in video_url:
23
+ print("Downloading video from YouTube...")
24
+ video_path = download_video(video_url)
25
+ else:
26
+ print("Downloading video from direct link...")
27
+ video_path = download_direct_video(video_url)
28
+
29
+ if not video_path or not os.path.exists(video_path):
30
+ return "Error: Failed to download video."
31
+
32
+ print("Extracting audio...")
33
+ audio_path = extract_audio(video_path)
34
+
35
+ print("Transcribing...")
36
+ transcription = transcribe_audio(audio_path)
37
+
38
+ print("Classifying content...")
39
+ category = classify_content(transcription)
40
+
41
+ return {
42
+ "transcription": transcription,
43
+ "category": category,
44
+ "video_path": video_path,
45
+ "audio_path": audio_path
46
+ }
47
  except Exception as e:
48
+ return f"Error processing video: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ # Gradio Interface
51
+ with gr.Blocks(title="Video Content Analyzer") as demo:
52
+ gr.Markdown("""
53
+ # 🎥 Video Content Analyzer
54
+ Analyze videos from TikTok, YouTube, or direct links. Get transcription and content classification.
55
+ """)
56
 
57
+ with gr.Row():
58
+ url_input = gr.Textbox(label="Video URL", placeholder="Enter TikTok, YouTube or direct video URL...")
59
+ submit_btn = gr.Button("Analyze", variant="primary")
60
 
61
+ with gr.Row():
62
+ with gr.Column():
63
+ transcription_output = gr.Textbox(label="Transcription", interactive=False)
64
+ category_output = gr.Textbox(label="Content Category", interactive=False)
65
+
66
+ with gr.Column():
67
+ video_preview = gr.Video(label="Downloaded Video")
68
+ audio_preview = gr.Audio(label="Extracted Audio")
69
 
70
+ submit_btn.click(
71
+ fn=process_video,
72
+ inputs=url_input,
73
+ outputs={
74
+ "transcription": transcription_output,
75
+ "category": category_output,
76
+ "video_path": video_preview,
77
+ "audio_path": audio_preview
78
+ }
79
+ )
 
 
80
 
 
81
  if __name__ == "__main__":
82
+ demo.launch(server_name="0.0.0.0", server_port=7860)