Yashwanth commited on
Commit
ae67512
·
1 Parent(s): 4577488

Fix: Use correct model deepseek-ai/deepseek-v3.2

Browse files
backend/Dockerfile.hf ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hugging Face Docker Space Dockerfile
2
+ FROM python:3.10-slim
3
+
4
+ # Install system dependencies
5
+ RUN apt-get update && apt-get install -y --no-install-recommends \
6
+ ffmpeg \
7
+ aria2 \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Create user with ID 1000 (required by HF)
11
+ RUN useradd -m -u 1000 user
12
+
13
+ # Set working directory
14
+ WORKDIR /app
15
+
16
+ # Create downloads directory with proper permissions
17
+ RUN mkdir -p /app/downloads && chown -R 1000:1000 /app/downloads
18
+
19
+ # Copy requirements first for caching
20
+ COPY --chown=1000:1000 requirements.txt .
21
+
22
+ # Install Python dependencies
23
+ RUN pip install --no-cache-dir -r requirements.txt
24
+
25
+ # Copy application code
26
+ COPY --chown=1000:1000 main.py .
27
+
28
+ # Switch to non-root user (required by HF)
29
+ USER 1000
30
+
31
+ # Expose port 7860 (required by HF)
32
+ EXPOSE 7860
33
+
34
+ # Environment variables
35
+ ENV DOWNLOAD_DIR=/app/downloads
36
+ ENV PYTHONUNBUFFERED=1
37
+
38
+ # Run the app
39
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
backend/api/index.py CHANGED
@@ -357,7 +357,7 @@ class ViralClipExtractor:
357
 
358
  logger.info(f"Processing chunk {chunk_index} with Nvidia AI...")
359
  completion = client.chat.completions.create(
360
- model="deepseek-ai/deepseek-v3.1-terminus",
361
  messages=[{"role":"user","content":prompt}],
362
  temperature=0.2,
363
  max_tokens=1024,
 
357
 
358
  logger.info(f"Processing chunk {chunk_index} with Nvidia AI...")
359
  completion = client.chat.completions.create(
360
+ model="deepseek-ai/deepseek-v3.2",
361
  messages=[{"role":"user","content":prompt}],
362
  temperature=0.2,
363
  max_tokens=1024,
backend/main.py ADDED
@@ -0,0 +1,485 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Viral Clip Extractor API - FastAPI version for Hugging Face
3
+ """
4
+ import os
5
+ import re
6
+ import json
7
+ import logging
8
+ import shutil
9
+ import tempfile
10
+ import hashlib
11
+ import uuid
12
+ from typing import List, Dict, Optional
13
+ from concurrent.futures import ThreadPoolExecutor, as_completed
14
+
15
+ from fastapi import FastAPI, HTTPException, Query, Request
16
+ from fastapi.responses import JSONResponse, FileResponse
17
+ from fastapi.middleware.cors import CORSMiddleware
18
+ import yt_dlp
19
+ import requests
20
+
21
+ # Try to import AI clients
22
+ try:
23
+ from openai import OpenAI
24
+ HAS_OPENAI = True
25
+ except ImportError:
26
+ HAS_OPENAI = False
27
+
28
+ try:
29
+ import google.generativeai as genai
30
+ HAS_GEMINI = True
31
+ except ImportError:
32
+ HAS_GEMINI = False
33
+
34
+ # Configure logging
35
+ logging.basicConfig(level=logging.INFO)
36
+ logger = logging.getLogger(__name__)
37
+
38
+ # FastAPI app
39
+ app = FastAPI(
40
+ title="Viral Clip Extractor API",
41
+ description="Extract viral clips from YouTube videos using AI",
42
+ version="2.0.0"
43
+ )
44
+
45
+ # CORS
46
+ app.add_middleware(
47
+ CORSMiddleware,
48
+ allow_origins=["*"],
49
+ allow_credentials=True,
50
+ allow_methods=["*"],
51
+ allow_headers=["*"],
52
+ )
53
+
54
+ # Constants
55
+ DOWNLOAD_DIR = os.environ.get("DOWNLOAD_DIR", "/app/downloads")
56
+ os.makedirs(DOWNLOAD_DIR, exist_ok=True)
57
+
58
+ # Ensure ffmpeg is available
59
+ FFMPEG_PATH = shutil.which("ffmpeg")
60
+ if FFMPEG_PATH:
61
+ logger.info(f"FFmpeg found at: {FFMPEG_PATH}")
62
+ else:
63
+ logger.warning("FFmpeg not found in PATH!")
64
+
65
+
66
+ def extract_video_id(url: str) -> Optional[str]:
67
+ """Extract YouTube video ID from URL"""
68
+ patterns = [
69
+ r'(?:v=|\/)([0-9A-Za-z_-]{11}).*',
70
+ r'(?:embed\/)([0-9A-Za-z_-]{11})',
71
+ r'(?:youtu\.be\/)([0-9A-Za-z_-]{11})',
72
+ ]
73
+ for pattern in patterns:
74
+ match = re.search(pattern, url)
75
+ if match:
76
+ return match.group(1)
77
+ return None
78
+
79
+
80
+ def parse_vtt_content(content: str) -> List[Dict]:
81
+ """Parse VTT subtitle content into segments"""
82
+ segments = []
83
+ lines = content.strip().split('\n')
84
+ i = 0
85
+ while i < len(lines):
86
+ line = lines[i].strip()
87
+ if '-->' in line:
88
+ time_match = re.match(r'(\d+:\d+:\d+\.\d+|\d+:\d+\.\d+)\s*-->\s*(\d+:\d+:\d+\.\d+|\d+:\d+\.\d+)', line)
89
+ if time_match:
90
+ start_str, end_str = time_match.groups()
91
+
92
+ def parse_time(t):
93
+ parts = t.replace(',', '.').split(':')
94
+ if len(parts) == 3:
95
+ return float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2])
96
+ else:
97
+ return float(parts[0]) * 60 + float(parts[1])
98
+
99
+ start = parse_time(start_str)
100
+ end = parse_time(end_str)
101
+
102
+ text_lines = []
103
+ i += 1
104
+ while i < len(lines) and lines[i].strip() and '-->' not in lines[i]:
105
+ text_lines.append(re.sub(r'<[^>]+>', '', lines[i].strip()))
106
+ i += 1
107
+
108
+ text = ' '.join(text_lines)
109
+ if text:
110
+ segments.append({'start': start, 'end': end, 'text': text})
111
+ continue
112
+ i += 1
113
+ return segments
114
+
115
+
116
+ class ViralClipExtractor:
117
+ def __init__(self):
118
+ self.headers = {
119
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
120
+ }
121
+ self.base_ydl_opts = {
122
+ 'quiet': True,
123
+ 'no_warnings': True,
124
+ 'socket_timeout': 30,
125
+ 'retries': 3,
126
+ 'nocheckcertificate': True,
127
+ 'http_headers': self.headers,
128
+ }
129
+
130
+ def extract_video_info(self, url: str) -> Dict:
131
+ """Extract basic video information"""
132
+ logger.info(f"Extracting video info for: {url}")
133
+ try:
134
+ with yt_dlp.YoutubeDL(self.base_ydl_opts) as ydl:
135
+ info = ydl.extract_info(url, download=False)
136
+ return {
137
+ 'title': info.get('title'),
138
+ 'duration': info.get('duration'),
139
+ 'thumbnail': info.get('thumbnail'),
140
+ 'channel': info.get('channel'),
141
+ 'view_count': info.get('view_count'),
142
+ }
143
+ except Exception as e:
144
+ logger.error(f"Error extracting video info: {e}")
145
+ return {}
146
+
147
+ def fetch_full_transcript(self, url: str) -> List[Dict]:
148
+ """Fetch transcript from YouTube video"""
149
+ logger.info(f"Fetching transcript for: {url}")
150
+ ydl_opts = {
151
+ **self.base_ydl_opts,
152
+ 'skip_download': True,
153
+ 'writesubtitles': True,
154
+ 'writeautomaticsub': True,
155
+ 'subtitleslangs': ['en'],
156
+ }
157
+
158
+ try:
159
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
160
+ info = ydl.extract_info(url, download=False)
161
+
162
+ sub_url = None
163
+ if 'en' in info.get('subtitles', {}):
164
+ sub_url = info['subtitles']['en'][-1].get('url')
165
+ elif 'en' in info.get('automatic_captions', {}):
166
+ sub_url = info['automatic_captions']['en'][-1].get('url')
167
+
168
+ if not sub_url:
169
+ logger.warning("No English subtitles found")
170
+ return []
171
+
172
+ logger.info(f"Found subtitle URL: {sub_url[:80]}...")
173
+ response = requests.get(sub_url, headers=self.headers, timeout=15)
174
+ response.raise_for_status()
175
+ sub_text = response.text
176
+ logger.info(f"Downloaded {len(sub_text)} chars of subtitle")
177
+
178
+ segments = []
179
+ if 'vtt' in sub_url or sub_text.startswith('WEBVTT'):
180
+ segments = parse_vtt_content(sub_text)
181
+ else:
182
+ # Try JSON format
183
+ try:
184
+ data = json.loads(sub_text)
185
+ if 'events' in data:
186
+ for event in data['events']:
187
+ if 'segs' in event:
188
+ start_time = event.get('tStartMs', 0) / 1000
189
+ duration = event.get('dDurationMs', 0) / 1000
190
+ text = ''.join(seg.get('utf8', '') for seg in event['segs'])
191
+ segments.append({
192
+ 'start': start_time,
193
+ 'end': start_time + duration,
194
+ 'text': text
195
+ })
196
+ except json.JSONDecodeError:
197
+ segments = parse_vtt_content(sub_text)
198
+
199
+ logger.info(f"Parsed {len(segments)} transcript segments")
200
+ return segments
201
+
202
+ except Exception as e:
203
+ logger.error(f"Transcript error: {e}")
204
+ import traceback
205
+ logger.error(traceback.format_exc())
206
+ return []
207
+
208
+ def analyze_with_nvidia(self, transcript_segments: List[Dict], api_key: str) -> List[Dict]:
209
+ """Analyze transcript using Nvidia DeepSeek API"""
210
+ if not HAS_OPENAI:
211
+ raise ValueError("OpenAI package not installed")
212
+
213
+ if not transcript_segments:
214
+ return []
215
+
216
+ # Combine segments into chunks
217
+ full_text = ""
218
+ for seg in transcript_segments:
219
+ full_text += f"[{seg['start']:.1f}s] {seg['text']} "
220
+
221
+ # Split into ~5 minute chunks
222
+ chunk_size = 300 # seconds
223
+ chunks = []
224
+ current_chunk = []
225
+ current_start = 0
226
+
227
+ for seg in transcript_segments:
228
+ if seg['start'] - current_start > chunk_size and current_chunk:
229
+ chunks.append(current_chunk)
230
+ current_chunk = []
231
+ current_start = seg['start']
232
+ current_chunk.append(seg)
233
+
234
+ if current_chunk:
235
+ chunks.append(current_chunk)
236
+
237
+ all_clips = []
238
+
239
+ def process_chunk(chunk_data):
240
+ chunk_index, chunk = chunk_data
241
+ chunk_text = " ".join([f"[{s['start']:.1f}s] {s['text']}" for s in chunk])
242
+
243
+ prompt = f"""Analyze this transcript section and identify 1-2 viral-worthy clips.
244
+ Each clip should be 15-60 seconds and have high viral potential.
245
+
246
+ Return JSON array:
247
+ [{{"start": <seconds>, "end": <seconds>, "viral_score": <1-100>, "reason": "<why viral>"}}]
248
+
249
+ Transcript:
250
+ {chunk_text}
251
+
252
+ JSON response only:"""
253
+
254
+ try:
255
+ client = OpenAI(
256
+ base_url="https://integrate.api.nvidia.com/v1",
257
+ api_key=api_key
258
+ )
259
+
260
+ completion = client.chat.completions.create(
261
+ model="deepseek-ai/deepseek-r1-distill-llama-70b",
262
+ messages=[{"role": "user", "content": prompt}],
263
+ temperature=0.2,
264
+ max_tokens=1024,
265
+ )
266
+
267
+ text = completion.choices[0].message.content
268
+ text = re.sub(r"```json\s*", "", text)
269
+ text = re.sub(r"```\s*", "", text)
270
+ text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
271
+
272
+ return json.loads(text)
273
+ except Exception as e:
274
+ logger.error(f"Chunk {chunk_index} error: {e}")
275
+ return []
276
+
277
+ # Process chunks in parallel
278
+ with ThreadPoolExecutor(max_workers=5) as executor:
279
+ futures = {executor.submit(process_chunk, (i, chunk)): i for i, chunk in enumerate(chunks)}
280
+ for future in as_completed(futures):
281
+ result = future.result()
282
+ if result:
283
+ all_clips.extend(result)
284
+
285
+ # Sort by viral score
286
+ all_clips.sort(key=lambda x: x.get('viral_score', 0), reverse=True)
287
+ return all_clips[:10] # Return top 10
288
+
289
+ def analyze_with_gemini(self, transcript_segments: List[Dict], api_key: str) -> List[Dict]:
290
+ """Analyze transcript using Gemini API"""
291
+ if not HAS_GEMINI:
292
+ raise ValueError("Google Generative AI package not installed")
293
+
294
+ if not transcript_segments:
295
+ return []
296
+
297
+ genai.configure(api_key=api_key)
298
+ model = genai.GenerativeModel('gemini-1.5-flash')
299
+
300
+ chunk_text = " ".join([f"[{s['start']:.1f}s] {s['text']}" for s in transcript_segments[:500]])
301
+
302
+ prompt = f"""Analyze this transcript and identify 3-5 viral-worthy clips.
303
+ Each clip should be 15-60 seconds with high engagement potential.
304
+
305
+ Return JSON array only:
306
+ [{{"start": <seconds>, "end": <seconds>, "viral_score": <1-100>, "reason": "<why viral>"}}]
307
+
308
+ Transcript:
309
+ {chunk_text}
310
+
311
+ JSON:"""
312
+
313
+ try:
314
+ response = model.generate_content(prompt)
315
+ text = response.text
316
+ text = re.sub(r"```json\s*", "", text)
317
+ text = re.sub(r"```\s*", "", text)
318
+ clips = json.loads(text)
319
+ clips.sort(key=lambda x: x.get('viral_score', 0), reverse=True)
320
+ return clips[:10]
321
+ except Exception as e:
322
+ logger.error(f"Gemini error: {e}")
323
+ return []
324
+
325
+ def download_clip(self, url: str, start: float, end: float, quality: str = "480") -> Optional[str]:
326
+ """Download and cut a clip"""
327
+ video_id = extract_video_id(url)
328
+ if not video_id:
329
+ return None
330
+
331
+ output_file = os.path.join(DOWNLOAD_DIR, f"{video_id}_{int(start)}_{int(end)}.mp4")
332
+
333
+ if os.path.exists(output_file):
334
+ return output_file
335
+
336
+ format_str = f"bestvideo[height<={quality}]+bestaudio/best[height<={quality}]/best"
337
+ output_template = os.path.join(DOWNLOAD_DIR, f"{video_id}_%(id)s.%(ext)s")
338
+
339
+ ydl_opts = {
340
+ **self.base_ydl_opts,
341
+ 'format': format_str,
342
+ 'outtmpl': output_template,
343
+ 'download_ranges': lambda info, ydl: [{'start_time': start, 'end_time': end}],
344
+ 'force_keyframes_at_cuts': True,
345
+ }
346
+
347
+ try:
348
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
349
+ ydl.download([url])
350
+
351
+ # Find downloaded file and rename
352
+ for f in os.listdir(DOWNLOAD_DIR):
353
+ if f.startswith(video_id) and f != os.path.basename(output_file):
354
+ src = os.path.join(DOWNLOAD_DIR, f)
355
+ os.rename(src, output_file)
356
+ break
357
+
358
+ if os.path.exists(output_file):
359
+ return output_file
360
+ return None
361
+
362
+ except Exception as e:
363
+ logger.error(f"Download error: {e}")
364
+ return None
365
+
366
+
367
+ # Global extractor instance
368
+ extractor = ViralClipExtractor()
369
+
370
+
371
+ @app.get("/")
372
+ async def root():
373
+ return {"status": "ok", "message": "Viral Clip Extractor API", "version": "2.0.0"}
374
+
375
+
376
+ @app.get("/health")
377
+ async def health():
378
+ return {"status": "healthy", "ffmpeg": FFMPEG_PATH is not None}
379
+
380
+
381
+ @app.get("/clips")
382
+ async def get_clips(
383
+ url: str = Query(..., description="YouTube video URL"),
384
+ mode: str = Query("nvidia", description="Analysis mode: nvidia, gemini, or heuristic"),
385
+ nvidia_key: Optional[str] = Query(None, description="Nvidia API key"),
386
+ gemini_key: Optional[str] = Query(None, description="Gemini API key"),
387
+ api_key: Optional[str] = Query(None, description="Generic API key"),
388
+ num: int = Query(5, description="Number of clips to return"),
389
+ ):
390
+ """Extract viral clips from a YouTube video"""
391
+ try:
392
+ video_id = extract_video_id(url)
393
+ if not video_id:
394
+ raise HTTPException(status_code=400, detail="Invalid YouTube URL")
395
+
396
+ # Get API key
397
+ key = nvidia_key or gemini_key or api_key or os.environ.get("NVIDIA_API_KEY") or os.environ.get("GEMINI_API_KEY")
398
+
399
+ # Get video info
400
+ video_info = extractor.extract_video_info(url)
401
+
402
+ # Fetch transcript
403
+ transcript = extractor.fetch_full_transcript(url)
404
+
405
+ clips = []
406
+ if transcript:
407
+ if mode == "nvidia" and key:
408
+ clips = extractor.analyze_with_nvidia(transcript, key)
409
+ elif mode == "gemini" and key:
410
+ clips = extractor.analyze_with_gemini(transcript, key)
411
+
412
+ return {
413
+ "success": True,
414
+ "video_id": video_id,
415
+ "video_title": video_info.get("title"),
416
+ "video_duration": video_info.get("duration"),
417
+ "mode": mode,
418
+ "clips": clips[:num],
419
+ "clips_count": len(clips[:num]),
420
+ }
421
+
422
+ except HTTPException:
423
+ raise
424
+ except Exception as e:
425
+ logger.error(f"Clips error: {e}")
426
+ import traceback
427
+ logger.error(traceback.format_exc())
428
+ return JSONResponse(
429
+ status_code=500,
430
+ content={"success": False, "error": str(e)}
431
+ )
432
+
433
+
434
+ @app.get("/debug_video")
435
+ async def debug_video(
436
+ url: str = Query(..., description="YouTube video URL"),
437
+ ):
438
+ """Debug endpoint to check transcript fetching"""
439
+ try:
440
+ video_id = extract_video_id(url)
441
+ video_info = extractor.extract_video_info(url)
442
+ transcript = extractor.fetch_full_transcript(url)
443
+
444
+ return {
445
+ "video_id": video_id,
446
+ "video_title": video_info.get("title"),
447
+ "video_duration": video_info.get("duration"),
448
+ "transcript_segments_count": len(transcript),
449
+ "transcript_sample": transcript[:5] if transcript else [],
450
+ }
451
+ except Exception as e:
452
+ import traceback
453
+ return JSONResponse(
454
+ status_code=500,
455
+ content={"error": str(e), "traceback": traceback.format_exc()}
456
+ )
457
+
458
+
459
+ @app.get("/download")
460
+ async def download_clip(
461
+ url: str = Query(..., description="YouTube video URL"),
462
+ start: float = Query(..., description="Start time in seconds"),
463
+ end: float = Query(..., description="End time in seconds"),
464
+ quality: str = Query("480", description="Video quality"),
465
+ ):
466
+ """Download a specific clip"""
467
+ try:
468
+ clip_path = extractor.download_clip(url, start, end, quality)
469
+ if clip_path and os.path.exists(clip_path):
470
+ return FileResponse(
471
+ clip_path,
472
+ media_type="video/mp4",
473
+ filename=os.path.basename(clip_path)
474
+ )
475
+ raise HTTPException(status_code=404, detail="Failed to download clip")
476
+ except HTTPException:
477
+ raise
478
+ except Exception as e:
479
+ logger.error(f"Download error: {e}")
480
+ raise HTTPException(status_code=500, detail=str(e))
481
+
482
+
483
+ if __name__ == "__main__":
484
+ import uvicorn
485
+ uvicorn.run(app, host="0.0.0.0", port=7860)
backend/requirements.txt CHANGED
@@ -1,35 +1,14 @@
1
- # Viral Clip Extractor API - Dependencies
2
- # Python 3.9+
3
-
4
- # Web Framework
5
- flask==3.0.3
6
- flask-cors==4.0.1
7
- werkzeug==3.0.3
8
-
9
- # YouTube Download
10
- yt-dlp>=2024.12.23
11
- google-generativeai==0.7.2
12
- openai==1.61.0 # For Nvidia API
13
- gunicorn==21.2.0 # Production Server
14
-
15
- # HTTP Requests
16
- requests==2.32.3
17
- urllib3==2.2.2
18
- certifi==2024.7.4
19
- charset-normalizer==3.3.2
20
- idna==3.7
21
-
22
- # Data Processing
23
- # (Using built-in json, re, typing modules)
24
-
25
- # Serverless Support
26
- # python-dateutil is often useful
27
- python-dateutil==2.8.2
28
-
29
- # Optional: For better JSON handling
30
- # (Using built-in json module)
31
-
32
- # Development dependencies (not needed for deployment)
33
- # pytest==8.3.2
34
- # black==24.8.0
35
- # flake8==7.1.1
 
1
+ # Core dependencies
2
+ fastapi>=0.104.0
3
+ uvicorn[standard]>=0.24.0
4
+ yt-dlp>=2024.1.1
5
+ requests>=2.31.0
6
+ httpx>=0.25.0
7
+
8
+ # AI clients
9
+ openai>=1.6.0
10
+ google-generativeai>=0.3.0
11
+
12
+ # Additional utilities
13
+ python-multipart>=0.0.6
14
+ aiofiles>=23.2.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/test_fastapi.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import json
3
+ from main import ViralClipExtractor, extract_video_id
4
+
5
+ logging.basicConfig(level=logging.INFO)
6
+
7
+ def test():
8
+ url = "https://youtu.be/uVkFrqugXFQ"
9
+ nvidia_key = "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"
10
+
11
+ print("=== FASTAPI LOCAL TEST ===")
12
+ print(f"URL: {url}")
13
+
14
+ extractor = ViralClipExtractor()
15
+
16
+ # 1. Video Info
17
+ print("\n1. Fetching Video Info...")
18
+ video_info = extractor.extract_video_info(url)
19
+ print(f" Title: {video_info.get('title')}")
20
+ print(f" Duration: {video_info.get('duration')} seconds")
21
+
22
+ # 2. Transcript
23
+ print("\n2. Fetching Transcript...")
24
+ transcript = extractor.fetch_full_transcript(url)
25
+ print(f" Transcript Segments: {len(transcript)}")
26
+ if transcript:
27
+ print(f" Sample: {transcript[0]}")
28
+ else:
29
+ print(" [FAIL] No transcript found!")
30
+ return
31
+
32
+ # 3. AI Analysis
33
+ print("\n3. Running Nvidia AI Analysis...")
34
+ clips = extractor.analyze_with_nvidia(transcript, nvidia_key)
35
+ print(f" Clips Found: {len(clips)}")
36
+
37
+ # 4. Final Response
38
+ response = {
39
+ "success": True,
40
+ "video_id": extract_video_id(url),
41
+ "video_title": video_info.get('title'),
42
+ "video_duration": video_info.get('duration'),
43
+ "mode": "nvidia",
44
+ "clips": clips,
45
+ "clips_count": len(clips)
46
+ }
47
+
48
+ print("\n=== FINAL RESPONSE ===")
49
+ print(json.dumps(response, indent=2))
50
+
51
+ if __name__ == "__main__":
52
+ test()