Ali Hashhash commited on
Commit
226ed2d
·
1 Parent(s): a5568e2

update time

Browse files
Files changed (1) hide show
  1. src/api/notes_routes.py +46 -1
src/api/notes_routes.py CHANGED
@@ -2,6 +2,7 @@ import uuid
2
  import re
3
  import logging
4
  import os
 
5
  from datetime import datetime
6
  from typing import List, Optional, Dict
7
  from pathlib import Path
@@ -26,6 +27,46 @@ router = APIRouter(tags=["Notes"])
26
  # مخزن المهام المؤقت في الذاكرة
27
  tasks: Dict[str, Dict] = {}
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  # --- Models ---
30
  class GenerateNotesRequest(BaseModel):
31
  youtube_url: HttpUrl
@@ -114,11 +155,15 @@ async def process_video_task(task_id: str, youtube_url: str, language: str, user
114
  "action_items": action_items.get("action_items", [])
115
  }
116
 
 
 
 
 
117
  final_markdown = note_gen.format_final_notes(
118
  note_gen.format_notes_to_markdown(combined_notes),
119
  video_title,
120
  youtube_url,
121
- 0,
122
  )
123
 
124
  # الخطوة 3: الحفظ في Firebase
 
2
  import re
3
  import logging
4
  import os
5
+ import urllib.request
6
  from datetime import datetime
7
  from typing import List, Optional, Dict
8
  from pathlib import Path
 
27
  # مخزن المهام المؤقت في الذاكرة
28
  tasks: Dict[str, Dict] = {}
29
 
30
+
31
+ # ==========================================
32
+ # ⏱️ YouTube Duration Scraper (stdlib only)
33
+ # ==========================================
34
+
35
+ def get_youtube_duration(url: str) -> int:
36
+ """
37
+ Fetches the YouTube video page and extracts the video duration in seconds
38
+ by scraping the `lengthSeconds` value from the page HTML.
39
+
40
+ Uses only Python standard library (urllib.request + re).
41
+ Returns the duration as an integer, or 0 if extraction fails for any reason.
42
+ """
43
+ try:
44
+ req = urllib.request.Request(
45
+ url,
46
+ headers={
47
+ "User-Agent": (
48
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
49
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
50
+ "Chrome/124.0.0.0 Safari/537.36"
51
+ )
52
+ },
53
+ )
54
+ with urllib.request.urlopen(req, timeout=10) as response:
55
+ html = response.read().decode("utf-8", errors="ignore")
56
+
57
+ # YouTube embeds duration in the page as: "lengthSeconds":"<value>"
58
+ match = re.search(r'"lengthSeconds"\s*:\s*"(\d+)"', html)
59
+ if match:
60
+ duration = int(match.group(1))
61
+ logger.info(f"⏱️ Extracted video duration: {duration}s")
62
+ return duration
63
+
64
+ logger.warning("⚠️ lengthSeconds not found in YouTube page HTML.")
65
+ return 0
66
+ except Exception as e:
67
+ logger.warning(f"⚠️ Could not fetch YouTube duration: {e}")
68
+ return 0
69
+
70
  # --- Models ---
71
  class GenerateNotesRequest(BaseModel):
72
  youtube_url: HttpUrl
 
155
  "action_items": action_items.get("action_items", [])
156
  }
157
 
158
+ # استخراج مدة الفيديو الحقيقية بدلاً من الصفر المبرمج
159
+ tasks[task_id]["message"] = "Fetching video metadata..."
160
+ video_duration = get_youtube_duration(youtube_url)
161
+
162
  final_markdown = note_gen.format_final_notes(
163
  note_gen.format_notes_to_markdown(combined_notes),
164
  video_title,
165
  youtube_url,
166
+ video_duration,
167
  )
168
 
169
  # الخطوة 3: الحفظ في Firebase