ayloll commited on
Commit
7480061
·
verified ·
1 Parent(s): 9e18881

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -164
app.py CHANGED
@@ -1,222 +1,170 @@
1
- import logging
2
  from fastapi import FastAPI, HTTPException
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from pydantic import BaseModel
 
5
  import yt_dlp
6
  import whisper
7
  import os
8
- import subprocess
9
- from transformers import pipeline
10
  from typing import Optional
11
- import traceback
12
 
13
- # Configure logging
14
- logging.basicConfig(level=logging.INFO)
15
- logger = logging.getLogger(__name__)
16
 
17
- app = FastAPI(title="TikTok Content Analyzer API")
18
-
19
- # CORS Configuration
20
  app.add_middleware(
21
  CORSMiddleware,
22
  allow_origins=["*"],
 
23
  allow_methods=["*"],
24
  allow_headers=["*"],
25
  )
26
 
27
  class VideoRequest(BaseModel):
28
- url: str
29
 
30
  class AnalysisResult(BaseModel):
31
  transcription: str
32
- category: Optional[str] = None
33
- confidence: Optional[float] = None
34
- error: Optional[str] = None
35
 
 
36
  def clean_temp_files():
37
- """Remove temporary files if they exist"""
38
  temp_files = ["temp_video.mp4", "temp_audio.mp3"]
39
  for file in temp_files:
40
- try:
41
- if os.path.exists(file):
42
- os.remove(file)
43
- logger.info(f"Removed temporary file: {file}")
44
- except Exception as e:
45
- logger.warning(f"Could not remove {file}: {str(e)}")
46
 
47
- def download_video(video_url: str) -> Optional[str]:
48
- """Download TikTok video using yt-dlp"""
49
  try:
50
  ydl_opts = {
51
- 'format': 'best[ext=mp4]',
52
  'outtmpl': 'temp_video.mp4',
53
  'quiet': True,
54
  'no_warnings': True,
55
- 'extract_flat': True,
56
- 'socket_timeout': 30,
57
- 'retries': 3
58
  }
59
 
60
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
61
- info = ydl.extract_info(video_url, download=True)
62
- logger.info(f"Successfully downloaded video: {info.get('title', 'No title')}")
63
-
64
- return "temp_video.mp4" if os.path.exists("temp_video.mp4") else None
65
-
66
- except yt_dlp.utils.DownloadError as e:
67
- logger.error(f"YT-DLP Download Error: {str(e)}")
68
  except Exception as e:
69
- logger.error(f"Unexpected download error: {str(e)}")
70
- logger.error(traceback.format_exc())
71
-
72
- return None
73
 
74
- def extract_audio(video_path: str) -> Optional[str]:
75
- """Extract audio from video using ffmpeg"""
76
- try:
77
- result = subprocess.run([
78
- "ffmpeg",
79
- "-i", video_path,
80
- "-vn",
81
- "-acodec", "libmp3lame",
82
- "-q:a", "3",
83
- "temp_audio.mp3",
84
- "-y"
85
- ], capture_output=True, text=True, check=True)
86
-
87
- logger.info("FFmpeg output: " + result.stdout)
88
- if result.stderr:
89
- logger.warning("FFmpeg warnings: " + result.stderr)
90
-
91
- return "temp_audio.mp3" if os.path.exists("temp_audio.mp3") else None
92
-
93
- except subprocess.CalledProcessError as e:
94
- logger.error(f"FFmpeg error: {str(e)}")
95
- logger.error(f"FFmpeg stderr: {e.stderr}")
96
- except Exception as e:
97
- logger.error(f"Unexpected audio extraction error: {str(e)}")
98
-
99
- return None
100
 
101
- def transcribe_audio(audio_path: str) -> Optional[str]:
102
- """Transcribe audio using Whisper"""
103
  try:
104
- logger.info("Loading Whisper model...")
105
- model = whisper.load_model("tiny") # Using tiny model for Hugging Face Spaces
106
-
107
- logger.info("Starting transcription...")
108
  result = model.transcribe(audio_path)
109
-
110
- if 'text' in result:
111
- logger.info("Transcription successful")
112
- return result['text']
113
- else:
114
- logger.error("Whisper returned no text")
115
-
116
  except Exception as e:
117
- logger.error(f"Transcription error: {str(e)}")
118
- logger.error(traceback.format_exc())
119
-
120
- return None
121
 
122
- def classify_content(text: str) -> tuple[Optional[str], Optional[float]]:
123
- """Classify text content using transformers"""
124
- if not text or len(text.strip()) == 0:
125
- return None, None
126
-
127
  try:
128
- logger.info("Loading classifier...")
129
- classifier = pipeline(
130
- "zero-shot-classification",
131
- model="facebook/bart-large-mnli",
132
- device="cpu"
133
- )
134
 
135
- labels = [
136
- "educational", "entertainment", "news",
137
- "political", "religious", "technical",
138
- "advertisement", "social"
139
- ]
140
 
141
- logger.info("Starting classification...")
142
- result = classifier(
143
- text,
144
- candidate_labels=labels,
145
- hypothesis_template="This text is about {}."
146
- )
147
 
148
- if result and 'labels' in result and 'scores' in result:
149
- return result['labels'][0], result['scores'][0]
150
-
151
  except Exception as e:
152
- logger.error(f"Classification error: {str(e)}")
153
- logger.error(traceback.format_exc())
154
-
155
- return None, None
156
 
157
  @app.post("/analyze", response_model=AnalysisResult)
158
  async def analyze_video(request: VideoRequest):
159
- """Main endpoint for video analysis"""
160
  clean_temp_files()
161
 
162
- # Validate URL
163
- if not request.url or not any(domain in request.url for domain in ["tiktok.com", "vm.tiktok.com"]):
164
- error_msg = "Invalid TikTok URL. Please provide a valid TikTok video URL."
165
- logger.error(error_msg)
166
- return AnalysisResult(error=error_msg)
167
 
168
- try:
169
- # Step 1: Download video
170
- logger.info(f"Processing URL: {request.url}")
171
- video_path = download_video(request.url)
172
- if not video_path:
173
- error_msg = "Failed to download video. The URL may be invalid or private."
174
- logger.error(error_msg)
175
- return AnalysisResult(error=error_msg)
176
-
177
- # Step 2: Extract audio
178
- audio_path = extract_audio(video_path)
179
- if not audio_path:
180
- error_msg = "Failed to extract audio from video."
181
- logger.error(error_msg)
182
- clean_temp_files()
183
- return AnalysisResult(error=error_msg)
184
-
185
- # Step 3: Transcribe audio
186
- transcription = transcribe_audio(audio_path)
187
- if not transcription:
188
- error_msg = "Failed to transcribe audio content."
189
- logger.error(error_msg)
190
- clean_temp_files()
191
- return AnalysisResult(error=error_msg)
192
-
193
- # Step 4: Classify content
194
- category, confidence = classify_content(transcription)
195
-
196
- # Clean up
197
  clean_temp_files()
198
-
199
  return AnalysisResult(
200
- transcription=transcription,
201
- category=category,
202
- confidence=confidence
 
203
  )
204
 
205
- except Exception as e:
206
- error_msg = f"Internal server error: {str(e)}"
207
- logger.error(error_msg)
208
- logger.error(traceback.format_exc())
 
 
 
 
 
 
 
 
 
 
209
  clean_temp_files()
210
- return AnalysisResult(error=error_msg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
  @app.get("/")
213
- async def health_check():
214
- """Health check endpoint"""
215
- return {
216
- "status": "OK",
217
- "message": "TikTok Analyzer API is running",
218
- "endpoints": {
219
- "analyze": "POST /analyze",
220
- "health": "GET /"
221
- }
222
- }
 
 
1
  from fastapi import FastAPI, HTTPException
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
+ from transformers import pipeline
5
  import yt_dlp
6
  import whisper
7
  import os
8
+ import uuid
9
+ import re
10
  from typing import Optional
 
11
 
12
+ app = FastAPI(title="TikTok Content Analyzer")
 
 
13
 
14
+ # Configure CORS
 
 
15
  app.add_middleware(
16
  CORSMiddleware,
17
  allow_origins=["*"],
18
+ allow_credentials=True,
19
  allow_methods=["*"],
20
  allow_headers=["*"],
21
  )
22
 
23
  class VideoRequest(BaseModel):
24
+ video_url: str
25
 
26
  class AnalysisResult(BaseModel):
27
  transcription: str
28
+ category: Optional[str]
29
+ confidence: Optional[float]
30
+ error: Optional[str]
31
 
32
+ # Delete temporary files
33
  def clean_temp_files():
 
34
  temp_files = ["temp_video.mp4", "temp_audio.mp3"]
35
  for file in temp_files:
36
+ if os.path.exists(file):
37
+ os.remove(file)
 
 
 
 
38
 
39
+ # Download TikTok video
40
+ def download_video(video_url):
41
  try:
42
  ydl_opts = {
43
+ 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
44
  'outtmpl': 'temp_video.mp4',
45
  'quiet': True,
46
  'no_warnings': True,
47
+ 'extractor_args': {'tiktok': {'skip_watermark': True}}
 
 
48
  }
49
 
50
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
51
+ ydl.download([video_url])
52
+ return "temp_video.mp4"
 
 
 
 
 
53
  except Exception as e:
54
+ print(f"Download error: {e}")
55
+ return None
 
 
56
 
57
+ # Extract audio
58
+ def extract_audio(video_path):
59
+ os.system(f"ffmpeg -i \"{video_path}\" -vn -acodec libmp3lame -q:a 3 \"temp_audio.mp3\" -y")
60
+ return "temp_audio.mp3" if os.path.exists("temp_audio.mp3") else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ # Transcribe audio
63
+ def transcribe_audio(audio_path):
64
  try:
65
+ model = whisper.load_model("base")
 
 
 
66
  result = model.transcribe(audio_path)
67
+ return result['text']
 
 
 
 
 
 
68
  except Exception as e:
69
+ print(f"Transcription error: {e}")
70
+ return None
 
 
71
 
72
+ # Classify content
73
+ def classify_content(text):
 
 
 
74
  try:
75
+ if not text or len(text.strip()) == 0:
76
+ return None, None
77
+
78
+ classifier = pipeline("zero-shot-classification",
79
+ model="facebook/bart-large-mnli")
 
80
 
81
+ labels = ["educational", "entertainment", "news", "political",
82
+ "religious", "technical", "advertisement", "social"]
 
 
 
83
 
84
+ result = classifier(text,
85
+ candidate_labels=labels,
86
+ hypothesis_template="This text is about {}.")
 
 
 
87
 
88
+ return result['labels'][0], result['scores'][0]
 
 
89
  except Exception as e:
90
+ print(f"Classification error: {e}")
91
+ return None, None
 
 
92
 
93
  @app.post("/analyze", response_model=AnalysisResult)
94
  async def analyze_video(request: VideoRequest):
 
95
  clean_temp_files()
96
 
97
+ video_url = request.video_url
 
 
 
 
98
 
99
+ if not video_url or len(video_url.strip()) == 0:
100
+ return AnalysisResult(
101
+ transcription="",
102
+ category=None,
103
+ confidence=None,
104
+ error="Please enter a valid TikTok URL"
105
+ )
106
+
107
+ if "tiktok.com" not in video_url and "vm.tiktok.com" not in video_url:
108
+ return AnalysisResult(
109
+ transcription="",
110
+ category=None,
111
+ confidence=None,
112
+ error="This app is for TikTok links only"
113
+ )
114
+
115
+ # Download video
116
+ video_path = download_video(video_url)
117
+ if not video_path:
118
+ return AnalysisResult(
119
+ transcription="",
120
+ category=None,
121
+ confidence=None,
122
+ error="Failed to download video"
123
+ )
124
+
125
+ # Extract audio
126
+ audio_path = extract_audio(video_path)
127
+ if not audio_path:
128
  clean_temp_files()
 
129
  return AnalysisResult(
130
+ transcription="",
131
+ category=None,
132
+ confidence=None,
133
+ error="Failed to extract audio"
134
  )
135
 
136
+ # Transcribe
137
+ transcription = transcribe_audio(audio_path)
138
+ if not transcription:
139
+ clean_temp_files()
140
+ return AnalysisResult(
141
+ transcription="",
142
+ category=None,
143
+ confidence=None,
144
+ error="Failed to transcribe audio"
145
+ )
146
+
147
+ # Classify
148
+ category, confidence = classify_content(transcription)
149
+ if not category:
150
  clean_temp_files()
151
+ return AnalysisResult(
152
+ transcription=transcription,
153
+ category=None,
154
+ confidence=None,
155
+ error="Failed to classify content"
156
+ )
157
+
158
+ # Clean up
159
+ clean_temp_files()
160
+
161
+ return AnalysisResult(
162
+ transcription=transcription,
163
+ category=category,
164
+ confidence=confidence,
165
+ error=None
166
+ )
167
 
168
  @app.get("/")
169
+ async def read_root():
170
+ return {"message": "TikTok Content Analyzer API is running"}