ALI7ADEL commited on
Commit
81618af
·
verified ·
1 Parent(s): 83615e5

Update src/api/notes_routes.py

Browse files
Files changed (1) hide show
  1. src/api/notes_routes.py +152 -191
src/api/notes_routes.py CHANGED
@@ -1,232 +1,193 @@
1
- """
2
- Notes management API endpoints.
3
- """
4
-
5
- from sqlmodel.ext.asyncio.session import AsyncSession
6
- from typing import List, Optional
7
- from pathlib import Path
8
  import os
 
 
 
9
 
10
- from fastapi import APIRouter, Depends, HTTPException, status, Query
11
- from fastapi.responses import FileResponse, JSONResponse
12
  from pydantic import BaseModel, HttpUrl, Field
13
- from sqlmodel import Session, select
14
 
15
  from src.db.firebase import get_firebase_db
16
  from src.db.models import User, Note
17
  from src.auth.dependencies import get_current_user
18
- from src.categorization.categorizer import CategorizationService
19
  from src.utils.logger import setup_logger
20
  from src.utils.config import settings
21
 
22
- logger = setup_logger(__name__)
23
- categorizer = CategorizationService()
 
 
 
24
 
 
25
  router = APIRouter(prefix="/notes", tags=["Notes"])
26
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- # --- New Models for File-based Notes ---
29
  class GeneratedNoteFile(BaseModel):
30
  filename: str
31
  title: str
32
  created_at: float
33
  size: int
34
 
 
 
 
35
 
36
- # --- Existing Models ---
37
- class CreateNoteRequest(BaseModel):
38
- video_url: HttpUrl = Field(..., description="YouTube video URL")
39
- video_title: str = Field(..., max_length=500, description="Video title")
40
- summary_text: str = Field(..., description="Generated study notes in markdown")
41
- video_duration: Optional[int] = Field(None, description="Video duration in seconds")
42
- language: str = Field(
43
- default="en", max_length=10, description="Video language code"
44
- )
 
 
 
 
 
 
 
 
45
 
 
 
 
 
 
 
 
 
46
 
47
- class NoteResponse(BaseModel):
48
- id: str # Changed to str for Firestore IDs
49
- video_url: str
50
- video_title: str
51
- summary_text: str
52
- video_duration: Optional[int]
53
- language: str
54
- user_id: str # Changed to str
55
- category: Optional[str]
56
- created_at: str
57
 
 
 
 
 
 
 
58
 
59
  # ==========================================
60
- # NEW ENDPOINTS: Read from 'outputs' folder
61
  # ==========================================
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  @router.get("/generated", response_model=List[GeneratedNoteFile])
65
  async def list_generated_notes():
66
- """
67
- List all markdown files found in the 'outputs' directory.
68
- This bypasses the database to show files directly.
69
- """
70
  notes = []
71
  output_dir = settings.output_dir
 
72
 
73
- # Create directory if it doesn't exist
74
- if not output_dir.exists():
75
- return []
76
-
77
- # Scan for .md files
78
- # We look for files ending with _notes.md
79
  for file_path in output_dir.glob("*_notes.md"):
80
- try:
81
- # Try to read the first line to get a clean title
82
- content = file_path.read_text(encoding="utf-8")
83
- lines = content.split("\n")
84
- # Usually the first line is "# Title"
85
- title = lines[0].replace("#", "").strip() if lines else file_path.name
86
-
87
- stats = file_path.stat()
88
-
89
- notes.append(
90
- GeneratedNoteFile(
91
- filename=file_path.name,
92
- title=title if title else file_path.name,
93
- created_at=stats.st_mtime,
94
- size=stats.st_size,
95
- )
96
- )
97
- except Exception as e:
98
- logger.error(f"Error reading file {file_path}: {e}")
99
- continue
100
-
101
- # Sort by newest first
102
  notes.sort(key=lambda x: x.created_at, reverse=True)
103
  return notes
104
 
105
-
106
  @router.get("/generated/{filename}")
107
- async def get_generated_note_content(filename: str):
108
- """
109
- Get the full content of a specific markdown file.
110
- """
111
- # Security check: prevent directory traversal
112
- if ".." in filename or "/" in filename:
113
- raise HTTPException(status_code=400, detail="Invalid filename")
114
-
115
  file_path = settings.output_dir / filename
116
-
117
  if not file_path.exists():
118
- raise HTTPException(status_code=404, detail="Note file not found")
119
-
120
- content = file_path.read_text(encoding="utf-8")
121
- return {"content": content, "filename": filename}
122
-
123
-
124
- # ==========================================
125
- # End of New Endpoints
126
- # ==========================================
127
-
128
-
129
- @router.get("/{note_id}", response_model=NoteResponse)
130
- async def get_note(
131
- note_id: str,
132
- current_user: User = Depends(get_current_user),
133
- ):
134
- """
135
- Get a specific note by ID from Firestore.
136
- """
137
- db = get_firebase_db()
138
- if db is None:
139
- raise HTTPException(status_code=500, detail="Firebase not configured")
140
-
141
- note_ref = db.collection("notes").document(note_id)
142
- note_doc = note_ref.get()
143
-
144
- if not note_doc.exists:
145
- raise HTTPException(status_code=404, detail="Note not found")
146
-
147
- note_data = note_doc.to_dict()
148
- if note_data.get("user_id") != current_user.id:
149
- raise HTTPException(status_code=403, detail="Forbidden")
150
-
151
- return NoteResponse(
152
- id=note_doc.id,
153
- video_url=note_data["video_url"],
154
- video_title=note_data["video_title"],
155
- summary_text=note_data["summary_content"],
156
- video_duration=None,
157
- language="en",
158
- user_id=note_data["user_id"],
159
- category=note_data.get("category"),
160
- created_at=str(note_data.get("created_at")),
161
- )
162
-
163
-
164
- @router.get("", response_model=List[NoteResponse])
165
- async def list_user_notes(
166
- current_user: User = Depends(get_current_user),
167
- ):
168
- """
169
- List all notes belonging to the current user from Firestore.
170
- """
171
- db = get_firebase_db()
172
- if db is None:
173
- return []
174
-
175
- notes_ref = db.collection("notes")
176
- query = (
177
- notes_ref.where("user_id", "==", current_user.id)
178
- .order_by("created_at", direction="DESCENDING")
179
- .stream()
180
- )
181
-
182
- return [
183
- NoteResponse(
184
- id=doc.id,
185
- video_url=data["video_url"],
186
- video_title=data["video_title"],
187
- summary_text=data["summary_content"],
188
- video_duration=None,
189
- language="en",
190
- user_id=data["user_id"],
191
- category=data.get("category"),
192
- created_at=str(data.get("created_at")),
193
- )
194
- for doc in query
195
- if (data := doc.to_dict())
196
- ]
197
-
198
-
199
- @router.post("", response_model=NoteResponse, status_code=status.HTTP_201_CREATED)
200
- async def create_note(
201
- note_data: CreateNoteRequest,
202
- current_user: User = Depends(get_current_user),
203
- ):
204
- # Automatically categorize the note
205
- category = await categorizer.categorize_text(note_data.summary_text)
206
-
207
- db = get_firebase_db()
208
- if db is None:
209
- raise HTTPException(status_code=500, detail="Firebase not configured")
210
-
211
- note_dict = {
212
- "video_url": str(note_data.video_url),
213
- "video_title": note_data.video_title,
214
- "summary_content": note_data.summary_text,
215
- "user_id": current_user.id,
216
- "category": category,
217
- "created_at": datetime.utcnow()
218
- }
219
-
220
- _, new_note_ref = db.collection("notes").add(note_dict)
221
-
222
- return NoteResponse(
223
- id=new_note_ref.id,
224
- video_url=note_dict["video_url"],
225
- video_title=note_dict["video_title"],
226
- summary_text=note_dict["summary_content"],
227
- video_duration=None,
228
- language="en",
229
- user_id=note_dict["user_id"],
230
- category=note_dict["category"],
231
- created_at=str(note_dict["created_at"]),
232
- )
 
1
+ import uuid
2
+ import re
3
+ import logging
 
 
 
 
4
  import os
5
+ from datetime import datetime
6
+ from typing import List, Optional, Dict
7
+ from pathlib import Path
8
 
9
+ from fastapi import APIRouter, Depends, HTTPException, status, Query, BackgroundTasks
 
10
  from pydantic import BaseModel, HttpUrl, Field
 
11
 
12
  from src.db.firebase import get_firebase_db
13
  from src.db.models import User, Note
14
  from src.auth.dependencies import get_current_user
 
15
  from src.utils.logger import setup_logger
16
  from src.utils.config import settings
17
 
18
+ # --- استدعاء أدوات المعالجة الجديدة ---
19
+ from src.api.downloader import YouTubeDownloader
20
+ from src.transcription.transcript_fetcher import TranscriptFetcher
21
+ from src.transcription.whisper_transcriber import WhisperTranscriber
22
+ from src.summarization.note_generator import NoteGenerator
23
 
24
+ logger = setup_logger(__name__)
25
  router = APIRouter(prefix="/notes", tags=["Notes"])
26
 
27
+ # مخزن المهام المؤقت في الذاكرة
28
+ tasks: Dict[str, Dict] = {}
29
+
30
+ # --- Models ---
31
+ class GenerateNotesRequest(BaseModel):
32
+ youtube_url: HttpUrl
33
+ language: str = "en"
34
+
35
+ class TaskResponse(BaseModel):
36
+ task_id: str
37
+ status: str
38
+ message: str
39
 
 
40
  class GeneratedNoteFile(BaseModel):
41
  filename: str
42
  title: str
43
  created_at: float
44
  size: int
45
 
46
+ # ==========================================
47
+ # 🚀 محرك توليد الملاحظات (Generate Engine)
48
+ # ==========================================
49
 
50
+ @router.post("/generate", response_model=TaskResponse)
51
+ async def generate_note(
52
+ request: GenerateNotesRequest,
53
+ background_tasks: BackgroundTasks,
54
+ current_user: User = Depends(get_current_user),
55
+ ):
56
+ """البدء في توليد ملاحظات من فيديو يوتيوب"""
57
+ task_id = str(uuid.uuid4())
58
+ user_id = current_user.id
59
+
60
+ tasks[task_id] = {
61
+ "status": "pending",
62
+ "message": "Initializing process...",
63
+ "youtube_url": str(request.youtube_url),
64
+ "user_id": user_id,
65
+ "created_at": datetime.now(),
66
+ }
67
 
68
+ # تشغيل المهمة في الخلفية
69
+ background_tasks.add_task(
70
+ process_video_task,
71
+ task_id,
72
+ str(request.youtube_url),
73
+ request.language,
74
+ user_id
75
+ )
76
 
77
+ return TaskResponse(
78
+ task_id=task_id,
79
+ status="pending",
80
+ message="Generation started successfully."
81
+ )
 
 
 
 
 
82
 
83
+ @router.get("/status/{task_id}")
84
+ async def get_task_status(task_id: str):
85
+ """متابعة حالة المهمة (pending, downloading, transcribing, etc.)"""
86
+ if task_id not in tasks:
87
+ raise HTTPException(status_code=404, detail="Task not found")
88
+ return tasks[task_id]
89
 
90
  # ==========================================
91
+ # 🛠️ دالات المعالجة (Background Logic)
92
  # ==========================================
93
 
94
+ async def process_video_task(task_id: str, youtube_url: str, language: str, user_id: str):
95
+ audio_file = None
96
+ downloader = YouTubeDownloader()
97
+
98
+ try:
99
+ # استخراج الـ Video ID
100
+ video_id_match = re.search(r"(?:v=|youtu\.be/)([A-Za-z0-9_-]{11})", youtube_url)
101
+ video_id = video_id_match.group(1) if video_id_match else task_id
102
+ video_title = "YouTube Video"
103
+
104
+ # 1. محاولة جلب النص (Transcript)
105
+ tasks[task_id]["status"] = "fetching_transcript"
106
+ fetcher = TranscriptFetcher()
107
+ try:
108
+ transcript_text = fetcher.fetch_transcript(youtube_url, languages=[language, 'ar', 'en'])
109
+ except:
110
+ transcript_text = None
111
+
112
+ # 2. لو مفيش ترجمة، نحمل الصوت ونستخدم Whisper
113
+ if not transcript_text:
114
+ tasks[task_id]["status"] = "downloading"
115
+ logger.info(f"Downloading audio with POT Solver for: {video_id}")
116
+ audio_file = downloader.download_audio(youtube_url, video_id)
117
+
118
+ if not audio_file:
119
+ raise Exception("Failed to download audio after multiple attempts.")
120
+
121
+ tasks[task_id]["status"] = "transcribing"
122
+ transcriber = WhisperTranscriber()
123
+ transcript_data = transcriber.transcribe(str(audio_file), language=language)
124
+ transcript_text = transcript_data["text"]
125
+
126
+ # 3. توليد الملاحظات بالـ AI
127
+ tasks[task_id]["status"] = "generating_notes"
128
+ note_gen = NoteGenerator()
129
+ summary_json = note_gen.generateSummary(transcript_text, video_title)
130
+ action_items = note_gen.extractActionItems(transcript_text, video_title)
131
+
132
+ combined_notes = {
133
+ **summary_json,
134
+ "action_items": action_items.get("action_items", [])
135
+ }
136
+
137
+ final_markdown = note_gen.format_final_notes(
138
+ note_gen.format_notes_to_markdown(combined_notes),
139
+ video_title,
140
+ youtube_url,
141
+ 0, # duration placeholder
142
+ )
143
+
144
+ # 4. الحفظ في Firebase
145
+ db = get_firebase_db()
146
+ if db:
147
+ db.collection("notes").add({
148
+ "user_id": user_id,
149
+ "video_url": youtube_url,
150
+ "video_title": video_title,
151
+ "summary_content": final_markdown,
152
+ "created_at": datetime.utcnow()
153
+ })
154
+
155
+ tasks[task_id]["status"] = "completed"
156
+ tasks[task_id]["notes"] = final_markdown
157
+ logger.info(f"✅ Task {task_id} finished!")
158
+
159
+ except Exception as e:
160
+ logger.error(f"❌ Task {task_id} failed: {e}")
161
+ tasks[task_id]["status"] = "failed"
162
+ tasks[task_id]["message"] = str(e)
163
+ finally:
164
+ if audio_file and downloader:
165
+ downloader.cleanup(audio_file)
166
+
167
+ # ==========================================
168
+ # 📂 إدارة الملفات (File Management)
169
+ # ==========================================
170
 
171
  @router.get("/generated", response_model=List[GeneratedNoteFile])
172
  async def list_generated_notes():
 
 
 
 
173
  notes = []
174
  output_dir = settings.output_dir
175
+ if not output_dir.exists(): return []
176
 
 
 
 
 
 
 
177
  for file_path in output_dir.glob("*_notes.md"):
178
+ stats = file_path.stat()
179
+ notes.append(GeneratedNoteFile(
180
+ filename=file_path.name,
181
+ title=file_path.name.replace("_notes.md", ""),
182
+ created_at=stats.st_mtime,
183
+ size=stats.st_size,
184
+ ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  notes.sort(key=lambda x: x.created_at, reverse=True)
186
  return notes
187
 
 
188
  @router.get("/generated/{filename}")
189
+ async def get_note_content(filename: str):
 
 
 
 
 
 
 
190
  file_path = settings.output_dir / filename
 
191
  if not file_path.exists():
192
+ raise HTTPException(status_code=404, detail="File not found")
193
+ return {"content": file_path.read_text(encoding="utf-8")}