Ali Hashhash commited on
Commit
fcaa56a
·
1 Parent(s): e204a8a
Files changed (2) hide show
  1. src/api/main.py +1 -186
  2. src/api/notes_routes.py +34 -14
src/api/main.py CHANGED
@@ -1,67 +1,19 @@
1
- import uuid
2
  from datetime import datetime
3
- from typing import Dict
4
- from enum import Enum
5
  from contextlib import asynccontextmanager
6
 
7
- from fastapi import FastAPI, HTTPException, BackgroundTasks, Depends
8
  from fastapi.middleware.cors import CORSMiddleware
9
- from pydantic import BaseModel, HttpUrl
10
 
11
  # POT Server and Routers
12
  from src.api.pot_server import pot_server
13
- from src.auth.dependencies import get_current_user
14
  from src.api.auth_routes import router as auth_router
15
  from src.api.notes_routes import router as notes_router
16
  from src.api.recommendation_routes import router as recommendation_router
17
  from src.utils.logger import setup_logger
18
 
19
- from src.transcription.transcript_fetcher import TranscriptFetcher
20
- from src.transcription.whisper_transcriber import WhisperTranscriber
21
- from src.summarization.note_generator import NoteGenerator
22
- from src.api.downloader import YouTubeDownloader
23
- from src.db.firebase import get_firebase_db
24
- from src.db.models import Note, User
25
-
26
  logger = setup_logger(__name__)
27
 
28
 
29
- # --- Models ---
30
- class TaskStatus(str, Enum):
31
- PENDING = "pending"
32
- DOWNLOADING = "downloading"
33
- TRANSCRIBING = "transcribing"
34
- GENERATING_NOTES = "generating_notes"
35
- COMPLETED = "completed"
36
- FAILED = "failed"
37
-
38
-
39
- class GenerateNotesRequest(BaseModel):
40
- youtube_url: HttpUrl
41
- language: str = "en"
42
-
43
-
44
- class TaskResponse(BaseModel):
45
- task_id: str
46
- status: TaskStatus
47
- message: str
48
-
49
-
50
- class TaskStatusResponse(BaseModel):
51
- status: TaskStatus
52
- message: str = ""
53
- youtube_url: str | None = None
54
- user_id: str | None = None
55
- created_at: datetime | None = None
56
- notes: str | None = None
57
- topics: list | None = []
58
- keyPoints: list | None = []
59
-
60
-
61
- # Global task storage
62
- tasks: Dict[str, Dict] = {}
63
-
64
-
65
  @asynccontextmanager
66
  async def lifespan(app: FastAPI):
67
  print("🚀 Lifespan: Starting POT solver server (bgutil v1.3.1)...")
@@ -142,140 +94,3 @@ async def health_check():
142
  "timestamp": datetime.now()
143
  }
144
 
145
-
146
- @app.post("/generate", response_model=TaskResponse)
147
- async def generate(
148
- request: GenerateNotesRequest,
149
- background_tasks: BackgroundTasks,
150
- current_user: User = Depends(get_current_user),
151
- ):
152
- task_id = str(uuid.uuid4())
153
- user_id = current_user.id
154
-
155
- tasks[task_id] = {
156
- "status": TaskStatus.PENDING,
157
- "message": "Initializing...",
158
- "youtube_url": str(request.youtube_url),
159
- "user_id": user_id,
160
- "created_at": datetime.now(),
161
- }
162
-
163
- background_tasks.add_task(
164
- process_video_and_save,
165
- task_id,
166
- str(request.youtube_url),
167
- request.language,
168
- user_id,
169
- )
170
-
171
- return TaskResponse(
172
- task_id=task_id,
173
- status=TaskStatus.PENDING,
174
- message="Generation started successfully.",
175
- )
176
-
177
-
178
- async def transcribeAudio(task_id: str, youtube_url: str, language: str):
179
- audio_file = None
180
- downloader = YouTubeDownloader()
181
- try:
182
- video_info = downloader.get_video_info(youtube_url)
183
- except Exception as e:
184
- logger.warning(f"Metadata extraction failed with yt-dlp: {e}. Trying fallback.")
185
- video_info = {
186
- "title": "YouTube Video",
187
- "duration": 0
188
- }
189
-
190
- tasks[task_id]["status"] = TaskStatus.TRANSCRIBING
191
- fetcher = TranscriptFetcher()
192
- transcript_text = fetcher.fetch_transcript(youtube_url, languages=[language, 'en'])
193
-
194
- if transcript_text:
195
- logger.info("Using direct YouTube transcript (v8)")
196
- else:
197
- logger.info("Direct transcript failed. Falling back to audio download + Whisper.")
198
- tasks[task_id]["status"] = TaskStatus.DOWNLOADING
199
- audio_file = downloader.download_audio(youtube_url, task_id)
200
-
201
- tasks[task_id]["status"] = TaskStatus.TRANSCRIBING
202
- transcriber = WhisperTranscriber()
203
- transcript_data = transcriber.transcribe(audio_file, language=language)
204
- transcript_text = transcript_data["text"]
205
-
206
- return transcript_text, video_info, audio_file, downloader
207
-
208
-
209
- async def process_video_and_save(
210
- task_id: str, youtube_url: str, language: str, user_id: str
211
- ):
212
- audio_file = None
213
- downloader = None
214
- try:
215
- # 1. Transcribe Audio
216
- transcript_text, video_info, audio_file, downloader = await transcribeAudio(
217
- task_id, youtube_url, language
218
- )
219
-
220
- # 2. Generate Summary
221
- tasks[task_id]["status"] = TaskStatus.GENERATING_NOTES
222
- note_gen = NoteGenerator()
223
- summary_json = note_gen.generateSummary(transcript_text, video_info["title"])
224
-
225
- final_notes = note_gen.format_final_notes(
226
- note_gen.format_notes_to_markdown(summary_json),
227
- video_info["title"],
228
- youtube_url,
229
- video_info["duration"],
230
- detected_language=summary_json.get("detected_language", "English"),
231
- )
232
-
233
- # Extract key insights from segments for Flutter frontend
234
- segments = summary_json.get("segments", [])
235
- key_points_list = []
236
- for seg in segments:
237
- if isinstance(seg, dict) and seg.get("key_insight"):
238
- key_points_list.append(seg["key_insight"])
239
-
240
- # Extract video_id from URL for thumbnail
241
- import re
242
- video_id_match = re.search(r'(?:v=|/)([0-9A-Za-z_-]{11})', youtube_url)
243
- video_id = video_id_match.group(1) if video_id_match else ""
244
-
245
- db = get_firebase_db()
246
- if db:
247
- note_data = {
248
- "userId": user_id,
249
- "videoUrl": youtube_url,
250
- "videoTitle": video_info["title"],
251
- "notes": final_notes,
252
- "thumbnail": f"https://img.youtube.com/vi/{video_id}/mqdefault.jpg" if video_id else "",
253
- "category": summary_json.get("topics", []),
254
- "keyPoints": key_points_list,
255
- "createdAt": datetime.utcnow(),
256
- "updatedAt": datetime.utcnow(),
257
- "isFavorite": False,
258
- }
259
- db.collection("notes").add(note_data)
260
- else:
261
- logger.warning("Firestore not initialized, note not saved to DB but generated in memory.")
262
-
263
- tasks[task_id]["notes"] = final_notes
264
- tasks[task_id]["topics"] = summary_json.get("topics", [])
265
- tasks[task_id]["keyPoints"] = key_points_list
266
- tasks[task_id]["status"] = TaskStatus.COMPLETED
267
- except Exception as e:
268
- error_msg = str(e)
269
- logger.error(f"Task failed: {error_msg}")
270
- tasks[task_id]["status"] = TaskStatus.FAILED
271
- tasks[task_id]["message"] = f"Error: {error_msg}"
272
- finally:
273
- if downloader and audio_file and audio_file.exists():
274
- downloader.cleanup(audio_file)
275
-
276
-
277
- @app.get("/status/{task_id}", response_model=TaskStatusResponse)
278
- async def get_task_status(task_id: str):
279
- if task_id not in tasks:
280
- raise HTTPException(status_code=404, detail="Task not found")
281
- return tasks[task_id]
 
 
1
  from datetime import datetime
 
 
2
  from contextlib import asynccontextmanager
3
 
4
+ from fastapi import FastAPI
5
  from fastapi.middleware.cors import CORSMiddleware
 
6
 
7
  # POT Server and Routers
8
  from src.api.pot_server import pot_server
 
9
  from src.api.auth_routes import router as auth_router
10
  from src.api.notes_routes import router as notes_router
11
  from src.api.recommendation_routes import router as recommendation_router
12
  from src.utils.logger import setup_logger
13
 
 
 
 
 
 
 
 
14
  logger = setup_logger(__name__)
15
 
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  @asynccontextmanager
18
  async def lifespan(app: FastAPI):
19
  print("🚀 Lifespan: Starting POT solver server (bgutil v1.3.1)...")
 
94
  "timestamp": datetime.now()
95
  }
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/api/notes_routes.py CHANGED
@@ -266,24 +266,23 @@ async def process_video_task(task_id: str, youtube_url: str, language: str, user
266
  downloader = YouTubeDownloader()
267
 
268
  try:
269
- # استخراج الـ Video ID للعرض فقط
270
  video_id_match = re.search(r"(?:v=|youtu\.be/)([A-Za-z0-9_-]{11})", youtube_url)
271
- video_id = video_id_match.group(1) if video_id_match else "unknown"
272
  video_title = "YouTube Video"
273
 
274
- # الخطوة 1: استخدام نظام "الشلال" الموحد
275
- # الدالة دي جواها دلوقتي (YouTube API -> AssemblyAI -> yt-dlp)
276
  tasks[task_id]["status"] = "transcribing"
277
  tasks[task_id]["message"] = "Processing transcript through optimized pipeline..."
278
 
279
  transcript_text = downloader.get_transcript(youtube_url)
280
 
281
- # الخطوة 2: توليد الملاحظات بالـ AI
282
  tasks[task_id]["status"] = "generating_notes"
283
  note_gen = NoteGenerator()
284
  summary_json = note_gen.generateSummary(transcript_text, video_title)
285
 
286
- # استخراج مدة الفيديو الحقيقية بدلاً من الصفر المبرمج
287
  tasks[task_id]["message"] = "Fetching video metadata..."
288
  video_duration = get_youtube_duration(youtube_url)
289
 
@@ -295,19 +294,40 @@ async def process_video_task(task_id: str, youtube_url: str, language: str, user
295
  detected_language=summary_json.get("detected_language", "English"),
296
  )
297
 
298
- # الخطوة 3: الحفظ في Firebase
 
 
 
 
 
 
 
 
 
 
 
 
299
  db = get_firebase_db()
300
  if db:
301
- db.collection("notes").add({
302
- "user_id": user_id,
303
- "video_url": youtube_url,
304
- "video_title": video_title,
305
- "summary_content": final_markdown,
306
- "created_at": datetime.utcnow()
307
- })
 
 
 
 
 
 
 
308
 
309
  tasks[task_id]["status"] = "completed"
310
  tasks[task_id]["notes"] = final_markdown
 
 
311
  logger.info(f"✅ Task {task_id} completed successfully!")
312
 
313
  except Exception as e:
 
266
  downloader = YouTubeDownloader()
267
 
268
  try:
269
+ # Extract video ID for thumbnail
270
  video_id_match = re.search(r"(?:v=|youtu\.be/)([A-Za-z0-9_-]{11})", youtube_url)
271
+ video_id = video_id_match.group(1) if video_id_match else ""
272
  video_title = "YouTube Video"
273
 
274
+ # Step 1: Transcribe
 
275
  tasks[task_id]["status"] = "transcribing"
276
  tasks[task_id]["message"] = "Processing transcript through optimized pipeline..."
277
 
278
  transcript_text = downloader.get_transcript(youtube_url)
279
 
280
+ # Step 2: Generate AI summary
281
  tasks[task_id]["status"] = "generating_notes"
282
  note_gen = NoteGenerator()
283
  summary_json = note_gen.generateSummary(transcript_text, video_title)
284
 
285
+ # Step 3: Fetch video duration
286
  tasks[task_id]["message"] = "Fetching video metadata..."
287
  video_duration = get_youtube_duration(youtube_url)
288
 
 
294
  detected_language=summary_json.get("detected_language", "English"),
295
  )
296
 
297
+ # Step 4: Extract key insights from segments
298
+ segments = summary_json.get("segments", [])
299
+ key_points_list = []
300
+ for seg in segments:
301
+ if isinstance(seg, dict) and seg.get("key_insight"):
302
+ key_points_list.append(seg["key_insight"])
303
+
304
+ # Step 5: Classify topics into predefined categories
305
+ from src.summarization.topic_classifier import classify_topics
306
+ raw_topics = summary_json.get("topics", [])
307
+ categories = classify_topics(raw_topics) if raw_topics else ["Education & Science"]
308
+
309
+ # Step 6: SINGLE Firestore write — unified camelCase schema matching note.txt
310
  db = get_firebase_db()
311
  if db:
312
+ note_data = {
313
+ "userId": user_id,
314
+ "videoUrl": youtube_url,
315
+ "videoTitle": video_title,
316
+ "notes": final_markdown,
317
+ "thumbnail": f"https://img.youtube.com/vi/{video_id}/mqdefault.jpg" if video_id else "",
318
+ "category": categories,
319
+ "keyPoints": key_points_list,
320
+ "createdAt": datetime.utcnow(),
321
+ "updatedAt": datetime.utcnow(),
322
+ "isFavorite": False,
323
+ }
324
+ db.collection("notes").add(note_data)
325
+ logger.info(f"✅ Note saved to Firestore for user {user_id}")
326
 
327
  tasks[task_id]["status"] = "completed"
328
  tasks[task_id]["notes"] = final_markdown
329
+ tasks[task_id]["topics"] = categories
330
+ tasks[task_id]["keyPoints"] = key_points_list
331
  logger.info(f"✅ Task {task_id} completed successfully!")
332
 
333
  except Exception as e: