bichnhan2701 commited on
Commit
7402e0f
Β·
1 Parent(s): 2432a11

Update note services logic

Browse files
.gitignore CHANGED
@@ -1,4 +1,7 @@
1
- NOTE_SERVICE_FLOW.md
2
  .myvenv
3
  __pycache__/
4
- *.pyc
 
 
 
 
 
 
1
  .myvenv
2
  __pycache__/
3
+ *.pyc
4
+ *.json
5
+ .env
6
+ *.txt
7
+ docs/
Dockerfile CHANGED
@@ -1,10 +1,14 @@
1
  FROM python:3.11-slim
2
  WORKDIR /app
3
 
4
- # install dependencies
5
- COPY requirements.txt ./
6
  RUN pip install --no-cache-dir -r requirements.txt
7
 
8
  COPY . .
9
 
 
 
 
 
 
10
  CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
  FROM python:3.11-slim
2
  WORKDIR /app
3
 
4
+ COPY requirements.txt .
 
5
  RUN pip install --no-cache-dir -r requirements.txt
6
 
7
  COPY . .
8
 
9
+ EXPOSE 7860
10
+
11
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=10s \
12
+ CMD curl -f http://localhost:7860/health || exit 1
13
+
14
  CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app/api/folders/folders_create.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from pydantic import BaseModel
3
+ from app.services.folder_store import create_folder
4
+ from app.utils.id import new_id
5
+ from app.utils.time import now_ts
6
+
7
+
8
+ router = APIRouter(prefix="/folders", tags=["folders"])
9
+
10
+
11
+ class CreateFolderRequest(BaseModel):
12
+ name: str
13
+ color_hex: str | None = None
14
+
15
+
16
+ @router.post("")
17
+ def create(req: CreateFolderRequest):
18
+ folder = {
19
+ "folder_id": new_id(),
20
+ "name": req.name,
21
+ "color_hex": req.color_hex,
22
+ "created_at": now_ts(),
23
+ "updated_at": now_ts(),
24
+ }
25
+ create_folder(folder)
26
+ return folder
app/api/folders/folders_delete.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from app.services.folder_store import get_folder, delete_folder
3
+
4
+ router = APIRouter(prefix="/folders", tags=["folders"])
5
+
6
+
7
+ @router.delete("/{folder_id}")
8
+ def delete_folder_api(folder_id: str):
9
+ folder = get_folder(folder_id)
10
+ if not folder:
11
+ raise HTTPException(status_code=404, detail="Folder not found")
12
+
13
+ delete_folder(folder_id)
14
+ return {"folder_id": folder_id, "deleted": True}
app/api/folders/folders_get.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from app.services.folder_store import get_folder, list_folders
3
+
4
+ router = APIRouter(prefix="/folders", tags=["folders"])
5
+
6
+
7
+ @router.get("")
8
+ def get_folders():
9
+ return list_folders()
10
+
11
+
12
+ @router.get("/{folder_id}")
13
+ def get_folder_by_id(folder_id: str):
14
+ folder = get_folder(folder_id)
15
+ if not folder:
16
+ raise HTTPException(status_code=404, detail="Folder not found")
17
+ return folder
app/api/folders/folders_update.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from pydantic import BaseModel
3
+ from typing import Optional
4
+ from app.services.folder_store import get_folder, update_folder
5
+ from app.utils.time import now_ts
6
+
7
+ router = APIRouter(prefix="/folders", tags=["folders"])
8
+
9
+
10
+ class UpdateFolderRequest(BaseModel):
11
+ name: Optional[str] = None
12
+ color_hex: Optional[str] = None
13
+
14
+
15
+ @router.patch("/{folder_id}")
16
+ def update_folder_api(folder_id: str, req: UpdateFolderRequest):
17
+ folder = get_folder(folder_id)
18
+ if not folder:
19
+ raise HTTPException(status_code=404, detail="Folder not found")
20
+
21
+ updates = req.dict(exclude_unset=True)
22
+ updates["updated_at"] = now_ts()
23
+
24
+ update_folder(folder_id, updates)
25
+ return {"folder_id": folder_id, "updated": True}
app/api/notes.py DELETED
@@ -1,35 +0,0 @@
1
- from fastapi import APIRouter, BackgroundTasks, HTTPException
2
- from pydantic import BaseModel
3
- from typing import Optional, List
4
- from app.services.storage import create_note as storage_create_note, get_note
5
- from app.jobs.enrichment_job import run_enrichment
6
-
7
- router = APIRouter(prefix="/notes")
8
-
9
- class CreateNoteRequest(BaseModel):
10
- note_id: str
11
- raw_text: str
12
- normalized_text: Optional[str] = None
13
- keywords: List[str] = []
14
- chunks: list = []
15
- duration: Optional[float] = None
16
- sample_rate: Optional[int] = None
17
- asr_model: Optional[str] = None
18
- normalization_model: Optional[str] = None
19
- generate: List[str] = []
20
-
21
- @router.post("")
22
- async def create_note(req: CreateNoteRequest, bg: BackgroundTasks):
23
- storage_create_note(req.note_id, req.dict())
24
-
25
- if req.generate:
26
- bg.add_task(run_enrichment, req.note_id, req.generate)
27
-
28
- return {"note_id": req.note_id, "status": "stored"}
29
-
30
- @router.get("/{note_id}")
31
- def fetch_note(note_id: str):
32
- note = get_note(note_id)
33
- if not note:
34
- raise HTTPException(404, "Note not found")
35
- return note
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/api/notes/notes_audio.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, BackgroundTasks, HTTPException
2
+ from pydantic import BaseModel
3
+ from typing import Dict, List, Optional
4
+ from app.services.note_store import create_note
5
+ from app.jobs.enrichment_job import enrich_note
6
+ from app.models.enums import NoteType, NoteStatus
7
+ from app.utils.time import now_ts
8
+
9
+
10
+ router = APIRouter(prefix="/internal/notes", tags=["internal"])
11
+
12
+
13
+ class CreateAudioNoteRequest(BaseModel):
14
+ note_id: str
15
+ raw_text: str
16
+ metadata: Dict
17
+ generate: List[str] = []
18
+
19
+
20
+ @router.post("/audio")
21
+ async def create_audio_note(req: CreateAudioNoteRequest, bg: BackgroundTasks):
22
+ now = now_ts()
23
+ has_enrichment = bool(req.generate)
24
+
25
+ note = {
26
+ "note_id": req.note_id,
27
+ "type": NoteType.audio,
28
+ "raw_text": req.raw_text,
29
+ "metadata": req.metadata,
30
+ "status": NoteStatus.processing if has_enrichment else NoteStatus.created,
31
+ "created_at": now,
32
+ "updated_at": now,
33
+ }
34
+
35
+ create_note(note)
36
+
37
+ if has_enrichment:
38
+ bg.add_task(enrich_note, req.note_id, req.generate)
39
+
40
+ return {
41
+ "note_id": req.note_id,
42
+ "status": note["status"],
43
+ }
app/api/notes/notes_get.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from app.services.note_store import get_note, list_notes
3
+
4
+
5
+ router = APIRouter(prefix="/notes", tags=["notes"])
6
+
7
+
8
+ @router.get("")
9
+ def get_notes(folder_id: str | None = None):
10
+ return list_notes(folder_id)
11
+
12
+
13
+ @router.get("/{note_id}")
14
+ def get_note_by_id(note_id: str):
15
+ note = get_note(note_id)
16
+ if not note:
17
+ raise HTTPException(404, "Note not found")
18
+ return note
app/api/notes/notes_regenerate.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.models.enums import NoteStatus
2
+ from app.utils.time import now_ts
3
+ from fastapi import APIRouter, BackgroundTasks, HTTPException
4
+ from pydantic import BaseModel
5
+ from typing import List
6
+ from app.services.note_store import get_note, update_note
7
+ from app.jobs.enrichment_job import enrich_note
8
+
9
+
10
+ router = APIRouter(prefix="/notes", tags=["notes"])
11
+
12
+
13
+ class RegenerateRequest(BaseModel):
14
+ generate: List[str]
15
+
16
+
17
+ @router.post("/{note_id}/regenerate")
18
+ def regenerate_note(note_id: str, req: RegenerateRequest, bg: BackgroundTasks):
19
+ note = get_note(note_id)
20
+ if not note:
21
+ raise HTTPException(404, "Note not found")
22
+
23
+ # mark processing immediately
24
+ update_note(note_id, {
25
+ "status": NoteStatus.processing,
26
+ "updated_at": now_ts(),
27
+ })
28
+
29
+ bg.add_task(enrich_note, note_id, req.generate)
30
+
31
+ return {
32
+ "note_id": note_id,
33
+ "status": NoteStatus.processing,
34
+ }
app/api/notes/notes_text.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, BackgroundTasks
2
+ from pydantic import BaseModel
3
+ from typing import List, Optional
4
+ from app.services.note_store import create_note
5
+ from app.jobs.enrichment_job import enrich_note
6
+ from app.models.enums import NoteType, NoteStatus
7
+ from app.utils.id import new_id
8
+ from app.utils.time import now_ts
9
+
10
+
11
+ router = APIRouter(prefix="/notes", tags=["notes"])
12
+
13
+
14
+ class CreateTextNoteRequest(BaseModel):
15
+ raw_text: str
16
+ folder_id: Optional[str] = None
17
+ generate: List[str] = []
18
+
19
+
20
+ @router.post("/text")
21
+ async def create_text_note(req: CreateTextNoteRequest, bg: BackgroundTasks):
22
+ note_id = new_id()
23
+ now = now_ts()
24
+
25
+ has_enrichment = bool(req.generate)
26
+
27
+ note = {
28
+ "note_id": note_id,
29
+ "type": NoteType.text,
30
+ "raw_text": req.raw_text,
31
+ "folder_id": req.folder_id,
32
+ "status": NoteStatus.processing if has_enrichment else NoteStatus.created,
33
+ "created_at": now,
34
+ "updated_at": now,
35
+ }
36
+
37
+ create_note(note)
38
+
39
+ if has_enrichment:
40
+ bg.add_task(enrich_note, note_id, req.generate)
41
+
42
+ return {
43
+ "note_id": note_id,
44
+ "status": note["status"],
45
+ }
app/api/notes/notes_update.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from pydantic import BaseModel
3
+ from typing import Optional
4
+ from app.services.note_store import get_note, update_note
5
+ from app.utils.time import now_ts
6
+
7
+
8
+ router = APIRouter(prefix="/notes", tags=["notes"])
9
+
10
+
11
+ class UpdateNoteRequest(BaseModel):
12
+ folder_id: Optional[str] = None
13
+ title: Optional[str] = None
14
+
15
+
16
+ @router.patch("/{note_id}")
17
+ def update_note_api(note_id: str, req: UpdateNoteRequest):
18
+ note = get_note(note_id)
19
+ if not note:
20
+ raise HTTPException(404, "Note not found")
21
+
22
+ updates = req.dict(exclude_unset=True)
23
+
24
+ # ❗ KhΓ΄ng cho client sα»­a status trα»±c tiαΊΏp
25
+ updates.pop("status", None)
26
+
27
+ updates["updated_at"] = now_ts()
28
+
29
+ update_note(note_id, updates)
30
+ return {"note_id": note_id, "updated": True}
app/config.py CHANGED
@@ -1,5 +1,9 @@
1
  import os
2
 
3
- GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
4
- GEMINI_MODEL = os.getenv("GEMINI_MODEL", "")
5
- FIREBASE_SERVICE_ACCOUNT = os.getenv("FIREBASE_SERVICE_ACCOUNT", "")
 
 
 
 
 
1
  import os
2
 
3
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
4
+ if not GEMINI_API_KEY:
5
+ raise RuntimeError("GEMINI_API_KEY is required")
6
+
7
+ GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
8
+
9
+ FIREBASE_SERVICE_ACCOUNT = os.getenv("FIREBASE_SERVICE_ACCOUNT", "")
app/infra/auth.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import Request
2
+
3
+ def get_current_user_id(request: Request) -> str | None:
4
+ """
5
+ Placeholder for auth context.
6
+ Later:
7
+ - Extract from JWT
8
+ - Or API Gateway headers
9
+ """
10
+ return request.headers.get("x-user-id")
app/{services β†’ infra}/firebase.py RENAMED
@@ -1,13 +1,9 @@
1
  import firebase_admin
2
  from firebase_admin import credentials, firestore
3
- import json
4
- from app.config import FIREBASE_SERVICE_ACCOUNT
5
 
6
  if not firebase_admin._apps:
7
- if not FIREBASE_SERVICE_ACCOUNT:
8
- raise RuntimeError("Missing FIREBASE_SERVICE_ACCOUNT")
9
-
10
- cred = credentials.Certificate(json.loads(FIREBASE_SERVICE_ACCOUNT))
11
  firebase_admin.initialize_app(cred)
12
 
13
- db = firestore.client()
 
1
  import firebase_admin
2
  from firebase_admin import credentials, firestore
3
+ import os, json
 
4
 
5
  if not firebase_admin._apps:
6
+ cred = credentials.Certificate(json.loads(os.environ["FIREBASE_SERVICE_ACCOUNT"]))
 
 
 
7
  firebase_admin.initialize_app(cred)
8
 
9
+ db = firestore.client()
app/jobs/enrichment_job.py CHANGED
@@ -1,53 +1,54 @@
1
- # from app.services.storage import get_note, update_note
2
- # from app.services.summary_service import generate_summary
3
- # from app.services.mindmap_service import generate_mindmap
4
-
5
- # async def run_enrichment(note_id: str, tasks: list):
6
- # note = get_note(note_id)
7
- # if not note:
8
- # return
9
-
10
- # text = note.get("normalized_text") or note["raw_text"]
11
-
12
- # update_note(note_id, status="processing")
13
- # updates = {}
14
-
15
- # if "summary" in tasks:
16
- # updates["summary"] = await generate_summary(text)
17
-
18
- # if "mindmap" in tasks:
19
- # updates["mindmap"] = await generate_mindmap(text)
20
 
21
- # update_note(note_id, data=updates, status="ready")
22
 
23
- import logging
24
- from app.services.storage import get_note, update_note
25
- from app.services.summary_service import generate_summary
26
- from app.services.mindmap_service import generate_mindmap
27
 
28
- async def run_enrichment(note_id: str, tasks: list):
29
  note = get_note(note_id)
30
  if not note:
31
- logging.warning(f"[enrichment] Note not found: {note_id}")
32
  return
33
 
34
- text = note.get("normalized_text") or note["raw_text"]
35
-
36
- update_note(note_id, status="processing")
37
- updates = {}
 
 
 
 
38
 
39
  try:
40
- if "summary" in tasks:
41
- try:
42
- updates["summary"] = await generate_summary(text)
43
- except Exception as e:
44
- logging.exception(f"[enrichment] generate_summary failed for note_id={note_id}: {e}")
45
-
46
- if "mindmap" in tasks:
47
- try:
48
- updates["mindmap"] = await generate_mindmap(text)
49
- except Exception as e:
50
- logging.exception(f"[enrichment] generate_mindmap failed for note_id={note_id}: {e}")
51
- finally:
52
- # DΓΉ thΓ nh cΓ΄ng hay thαΊ₯t bαΊ‘i, vαΊ«n set status=ready để client ngα»«ng poll
53
- update_note(note_id, data=updates, status="ready")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from app.services.note_store import get_note, update_note
3
+ from app.services.enrichment.pipeline import run_pipeline
4
+ from app.models.enums import NoteStatus
5
+ from app.utils.time import now_ts
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ logger = logging.getLogger(__name__)
8
 
 
 
 
 
9
 
10
+ async def enrich_note(note_id: str, tasks: list[str]):
11
  note = get_note(note_id)
12
  if not note:
 
13
  return
14
 
15
+ # Mark as processing
16
+ update_note(
17
+ note_id,
18
+ {
19
+ "status": NoteStatus.processing,
20
+ "updated_at": now_ts(),
21
+ },
22
+ )
23
 
24
  try:
25
+ # Run NLP pipeline (mutates a copy of note)
26
+ enriched = await run_pipeline(note, tasks)
27
+
28
+ updates = {}
29
+
30
+ # Only persist known enrichment fields
31
+ for field in (
32
+ "title",
33
+ "normalized_text",
34
+ "keywords",
35
+ "summary",
36
+ "mindmap",
37
+ ):
38
+ if field in enriched:
39
+ updates[field] = enriched[field]
40
+
41
+ updates["status"] = NoteStatus.ready
42
+ updates["updated_at"] = now_ts()
43
+
44
+ update_note(note_id, updates)
45
+
46
+ except Exception:
47
+ logger.exception("Enrichment failed note_id=%s", note_id)
48
+ update_note(
49
+ note_id,
50
+ {
51
+ "status": NoteStatus.error,
52
+ "updated_at": now_ts(),
53
+ },
54
+ )
app/main.py CHANGED
@@ -1,9 +1,22 @@
1
  from fastapi import FastAPI
2
- from app.api.notes import router as notes_router
 
3
 
4
- app = FastAPI(title="Note Services API")
5
- app.include_router(notes_router)
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  @app.get("/health")
8
  def health():
9
- return {"status": "ok"}
 
1
  from fastapi import FastAPI
2
+ from app.api.notes import notes_text, notes_audio, notes_get, notes_update, notes_regenerate
3
+ from app.api.folders import folders_create, folders_get, folders_update, folders_delete
4
 
5
+ app = FastAPI(title="Note Service API")
6
+
7
+ # Notes
8
+ app.include_router(notes_text.router)
9
+ app.include_router(notes_audio.router)
10
+ app.include_router(notes_get.router)
11
+ app.include_router(notes_update.router)
12
+ app.include_router(notes_regenerate.router)
13
+
14
+ # Folders
15
+ app.include_router(folders_create.router)
16
+ app.include_router(folders_get.router)
17
+ app.include_router(folders_update.router)
18
+ app.include_router(folders_delete.router)
19
 
20
  @app.get("/health")
21
  def health():
22
+ return {"status": "ok"}
app/models/enums.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from enum import Enum
2
+
3
+ class NoteType(str, Enum):
4
+ audio = "audio"
5
+ text = "text"
6
+
7
+ class NoteStatus(str, Enum):
8
+ created = "created"
9
+ processing = "processing"
10
+ ready = "ready"
11
+ error = "error"
app/models/folder.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+ class Folder(BaseModel):
4
+ folder_id: str
5
+ name: str
6
+ color_hex: str | None = None
7
+ user_id: str | None = None
8
+ created_at: int
9
+ updated_at: int
app/models/note.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import Optional, List, Dict
3
+ from app.models.enums import NoteType, NoteStatus
4
+
5
+ class AudioMetadata(BaseModel):
6
+ duration: Optional[float] = None
7
+ chunks: Optional[list] = None
8
+ sample_rate: Optional[int] = None
9
+ asr_model: Optional[str] = None
10
+
11
+ class NoteMetadata(BaseModel):
12
+ audio: Optional[AudioMetadata] = None
13
+ client: Optional[Dict] = None
14
+
15
+ class Note(BaseModel):
16
+ note_id: str
17
+ type: NoteType
18
+
19
+ title: Optional[str] = None
20
+ raw_text: Optional[str] = None
21
+ normalized_text: Optional[str] = None
22
+ keywords: Optional[List[str]] = None
23
+ summary: Optional[str] = None
24
+ mindmap: Optional[Dict] = None
25
+
26
+ folder_id: Optional[str] = None
27
+ metadata: Optional[NoteMetadata] = None
28
+
29
+ status: NoteStatus
30
+ created_at: int
31
+ updated_at: int
app/services/{mindmap_service.py β†’ enrichment/mindmap.py} RENAMED
@@ -1,11 +1,11 @@
1
  import asyncio
2
- import json
3
- import logging
4
- import random
5
  import re
6
- import time
7
-
8
- from app.config import GEMINI_API_KEY
 
 
 
9
 
10
  try:
11
  import google.genai as genai
@@ -23,8 +23,7 @@ try:
23
  except Exception:
24
  GoogleAPIError = Exception
25
 
26
- _MINDMAP_MODEL = "gemini-2.5-flash"
27
- _gemini_client = None
28
 
29
  if not genai:
30
  logging.warning("[mindmap_service] google.genai not available, mindmap generation will be disabled")
@@ -32,23 +31,18 @@ elif not GEMINI_API_KEY:
32
  logging.warning("[mindmap_service] GEMINI_API_KEY is not set, mindmap generation will be disabled")
33
  else:
34
  try:
35
- _gemini_client = genai.Client(api_key=GEMINI_API_KEY)
36
- logging.info(f"[mindmap_service] Initialized google.genai client with model={_MINDMAP_MODEL}")
37
  except Exception as e:
38
  logging.exception(f"[mindmap_service] Failed to init google.genai client: {e}")
39
- _gemini_client = None
40
-
41
 
42
  async def generate_mindmap(text: str) -> dict:
43
- """Sinh cαΊ₯u trΓΊc mindmap JSON tα»« vΔƒn bαΊ£n.
44
- Fallback: trαΊ£ {{}} nαΊΏu khΓ΄ng cΓ³ model hoαΊ·c lα»—i.
45
- """
46
- if not _gemini_client:
47
  return {}
48
 
49
  prompt = f"""
50
  BαΊ‘n lΓ  chuyΓͺn gia tαΊ‘o SΖ‘ Δ‘α»“ tΖ° duy. HΓ£y phΓ’n tΓ­ch vΔƒn bαΊ£n sau vΓ  tαΊ‘o CαΊ€U TRÚC JSON Mindmap.
51
-
52
  YΓͺu cαΊ§u:
53
  1. XΓ‘c Δ‘α»‹nh Ý chΓ­nh lΓ m Root.
54
  2. PhΓ’n tΓ‘ch Γ½ phα»₯ thΓ nh nhΓ‘nh con (tα»‘i Δ‘a 3 cαΊ₯p).
@@ -57,7 +51,6 @@ YΓͺu cαΊ§u:
57
  - Root: "#6200EE"
58
  - CΓ‘c nhΓ‘nh con: sα»­ dα»₯ng mα»™t trong cΓ‘c mΓ u: "#F59E2B", "#2ECF9A", "#2F9BFF"
59
  5. CHỈ TRαΊ’ VỀ JSON, khΓ΄ng giαΊ£i thΓ­ch thΓͺm.
60
-
61
  CαΊ₯u trΓΊc JSON bαΊ―t buα»™c:
62
  {{
63
  "root": {{
@@ -85,8 +78,8 @@ Văn bản:
85
  last_exc = None
86
  for attempt in range(1, MAX_RETRIES + 1):
87
  try:
88
- resp = _gemini_client.models.generate_content(
89
- model=_MINDMAP_MODEL,
90
  contents=prompt,
91
  )
92
  return resp.text or ""
@@ -160,4 +153,3 @@ Văn bản:
160
  return fallback
161
  except Exception:
162
  return {}
163
-
 
1
  import asyncio
 
 
 
2
  import re
3
+ import logging
4
+ from random import random
5
+ from time import time
6
+ import google.genai as genai
7
+ import json
8
+ from app.config import GEMINI_API_KEY, GEMINI_MODEL
9
 
10
  try:
11
  import google.genai as genai
 
23
  except Exception:
24
  GoogleAPIError = Exception
25
 
26
+ gemini_client = None
 
27
 
28
  if not genai:
29
  logging.warning("[mindmap_service] google.genai not available, mindmap generation will be disabled")
 
31
  logging.warning("[mindmap_service] GEMINI_API_KEY is not set, mindmap generation will be disabled")
32
  else:
33
  try:
34
+ gemini_client = genai.Client(api_key=GEMINI_API_KEY)
35
+ logging.info(f"[mindmap_service] Initialized google.genai client with model={GEMINI_MODEL}")
36
  except Exception as e:
37
  logging.exception(f"[mindmap_service] Failed to init google.genai client: {e}")
38
+ gemini_client = None
 
39
 
40
  async def generate_mindmap(text: str) -> dict:
41
+ if not text:
 
 
 
42
  return {}
43
 
44
  prompt = f"""
45
  BαΊ‘n lΓ  chuyΓͺn gia tαΊ‘o SΖ‘ Δ‘α»“ tΖ° duy. HΓ£y phΓ’n tΓ­ch vΔƒn bαΊ£n sau vΓ  tαΊ‘o CαΊ€U TRÚC JSON Mindmap.
 
46
  YΓͺu cαΊ§u:
47
  1. XΓ‘c Δ‘α»‹nh Ý chΓ­nh lΓ m Root.
48
  2. PhΓ’n tΓ‘ch Γ½ phα»₯ thΓ nh nhΓ‘nh con (tα»‘i Δ‘a 3 cαΊ₯p).
 
51
  - Root: "#6200EE"
52
  - CΓ‘c nhΓ‘nh con: sα»­ dα»₯ng mα»™t trong cΓ‘c mΓ u: "#F59E2B", "#2ECF9A", "#2F9BFF"
53
  5. CHỈ TRαΊ’ VỀ JSON, khΓ΄ng giαΊ£i thΓ­ch thΓͺm.
 
54
  CαΊ₯u trΓΊc JSON bαΊ―t buα»™c:
55
  {{
56
  "root": {{
 
78
  last_exc = None
79
  for attempt in range(1, MAX_RETRIES + 1):
80
  try:
81
+ resp = gemini_client.models.generate_content(
82
+ model=GEMINI_MODEL,
83
  contents=prompt,
84
  )
85
  return resp.text or ""
 
153
  return fallback
154
  except Exception:
155
  return {}
 
app/services/enrichment/normalize.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ import logging
4
+ import json
5
+ import re
6
+ import time
7
+ import random
8
+ from app.config import GEMINI_API_KEY, GEMINI_MODEL
9
+
10
+ try:
11
+ import google.genai as genai
12
+ try:
13
+ from google.genai import errors as genai_errors
14
+ except Exception:
15
+ genai_errors = None
16
+ except Exception:
17
+ genai = None
18
+ genai_errors = None
19
+ logging.warning("[normalize_service] google.genai module not found; normalization disabled")
20
+
21
+ try:
22
+ from google.api_core.exceptions import GoogleAPIError
23
+ except Exception:
24
+ GoogleAPIError = Exception
25
+
26
+ gemini_client = None
27
+
28
+ if not genai:
29
+ logging.warning("[normalize_service] google.genai not available, normalization will be disabled")
30
+ elif not GEMINI_API_KEY:
31
+ logging.warning("[normalize_service] GEMINI_API_KEY is not set, normalization will be disabled")
32
+ else:
33
+ try:
34
+ gemini_client = genai.Client(api_key=GEMINI_API_KEY)
35
+ logging.info(f"[normalize_service] Initialized google.genai client with model={GEMINI_MODEL}")
36
+ except Exception as e:
37
+ logging.exception(f"[normalize_service] Failed to init google.genai client: {e}")
38
+ gemini_client = None
39
+
40
+
41
+ async def normalize_text(raw_text: str) -> str:
42
+ if not raw_text:
43
+ return raw_text
44
+
45
+ prompt = f"""
46
+ BαΊ‘n lΓ  mα»™t hệ thα»‘ng Xα»­ lΓ½ HαΊ­u kα»³ NLP (NLP Post-Processing) TiαΊΏng Việt.
47
+ Đầu vΓ o lΓ  vΔƒn bαΊ£n thΓ΄ (raw transcript), cΓ³ thể thiαΊΏu dαΊ₯u cΓ’u vΓ  sai chΓ­nh tαΊ£ do nhαΊ­n dαΊ‘ng giọng nΓ³i.
48
+
49
+ Nhiệm vα»₯:
50
+ - Sα»­a lα»—i chΓ­nh tαΊ£ do ASR.
51
+ - ThΓͺm dαΊ₯u cΓ’u phΓΉ hợp.
52
+ - ViαΊΏt hoa Δ‘ΓΊng chuαΊ©n tiαΊΏng Việt (Δ‘αΊ§u cΓ’u, tΓͺn riΓͺng nαΊΏu suy luαΊ­n được).
53
+ - LoαΊ‘i bỏ cΓ‘c tα»«/cα»₯m tα»« bα»‹ lαΊ·p lαΊ‘i vΓ΄ nghΔ©a.
54
+ - Giα»― nguyΓͺn nα»™i dung vΓ  Γ½ nghΔ©a gα»‘c, khΓ΄ng rΓΊt gọn, khΓ΄ng thΓͺm thΓ΄ng tin mα»›i.
55
+
56
+ YÊU CẦU ĐẦU RA:
57
+ - Chỉ trαΊ£ về vΔƒn bαΊ£n Δ‘Γ£ chuαΊ©n hΓ³a
58
+ - KHΓ”NG JSON, KHΓ”NG giαΊ£i thΓ­ch, KHΓ”NG markdown
59
+
60
+ Văn bản đầu vào:
61
+ \"\"\"{raw_text}\"\"\"
62
+ """
63
+
64
+ loop = asyncio.get_event_loop()
65
+
66
+ MAX_RETRIES = 3
67
+ BASE_DELAY = 1.0
68
+
69
+ def call():
70
+ last_exc = None
71
+ for attempt in range(1, MAX_RETRIES + 1):
72
+ try:
73
+ if gemini_client:
74
+ resp = gemini_client.models.generate_content(
75
+ model=GEMINI_MODEL,
76
+ contents=prompt,
77
+ )
78
+ return resp.text or ""
79
+ else:
80
+ model = genai.GenerativeModel(GEMINI_MODEL) if genai else None
81
+ if model:
82
+ resp = model.generate_content(prompt)
83
+ return getattr(resp, "text", "") or ""
84
+ return ""
85
+ except Exception as e:
86
+ last_exc = e
87
+ msg = str(e)
88
+ if "503" in msg or "UNAVAILABLE" in msg:
89
+ if attempt < MAX_RETRIES:
90
+ delay = BASE_DELAY * (2 ** (attempt - 1))
91
+ delay += random.uniform(0, 0.5 * delay)
92
+ logging.warning(
93
+ f"[normalize_service] model overloaded "
94
+ f"(attempt {attempt}/{MAX_RETRIES}), retrying after {delay:.2f}s"
95
+ )
96
+ time.sleep(delay)
97
+ continue
98
+ logging.exception(
99
+ f"[normalize_service] normalize call failed on attempt {attempt}: {e}"
100
+ )
101
+ break
102
+
103
+ if last_exc:
104
+ raise last_exc
105
+ return ""
106
+
107
+ try:
108
+ raw = await loop.run_in_executor(None, call)
109
+
110
+ if raw:
111
+ text = raw.strip()
112
+
113
+ # defensive cleanup (trường hợp model vẫn lố trả markdown)
114
+ text = re.sub(r"^```.*?\n", "", text, flags=re.DOTALL)
115
+ text = re.sub(r"```$", "", text)
116
+ text = text.strip('"').strip("'").strip()
117
+
118
+ if text:
119
+ return text
120
+
121
+ except GoogleAPIError as e:
122
+ logging.error(f"[normalize_service] Gemini API error: {e}")
123
+ except Exception as e:
124
+ logging.exception(f"[normalize_service] normalize_text failed: {e}")
125
+
126
+ # ===== fallback: best-effort local normalization =====
127
+ try:
128
+ text = raw_text.strip()
129
+ text = re.sub(r"\s+", " ", text)
130
+
131
+ if text and text[-1] not in ".!?":
132
+ text += "."
133
+
134
+ def cap_sentences(s: str) -> str:
135
+ parts = re.split(r'([.!?]\s+)', s)
136
+ out = ""
137
+ for i in range(0, len(parts), 2):
138
+ sentence = parts[i].strip()
139
+ sep = parts[i + 1] if i + 1 < len(parts) else ""
140
+ if sentence:
141
+ sentence = sentence[0].upper() + sentence[1:]
142
+ out += sentence + sep
143
+ return out
144
+
145
+ return cap_sentences(text)
146
+
147
+ except Exception:
148
+ return raw_text
app/services/enrichment/pipeline.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.services.enrichment.normalize import normalize_text
2
+ from app.services.enrichment.title_keywords import extract_title_and_keywords
3
+ from app.services.enrichment.summary import generate_summary
4
+ from app.services.enrichment.mindmap import generate_mindmap
5
+
6
+ async def run_pipeline(note: dict, tasks: list[str]):
7
+ raw_text = note.get("raw_text") or ""
8
+ if not raw_text.strip():
9
+ # Nothing to process
10
+ return note
11
+
12
+ text = raw_text
13
+
14
+ # 1️⃣ Normalize
15
+ if "normalize" in tasks:
16
+ try:
17
+ text = await normalize_text(text)
18
+ note["normalized_text"] = text
19
+ except Exception:
20
+ # Fail-safe: keep raw_text
21
+ note["normalized_text"] = text
22
+
23
+ # 2️⃣ Title + Keywords (same AI call)
24
+ if "keywords" in tasks:
25
+ try:
26
+ title, keywords = await extract_title_and_keywords(text)
27
+ if title:
28
+ note["title"] = title
29
+ if keywords:
30
+ note["keywords"] = keywords
31
+ except Exception:
32
+ # Fail-safe: skip title & keywords
33
+ pass
34
+
35
+ # 3️⃣ Summary
36
+ if "summary" in tasks:
37
+ try:
38
+ note["summary"] = await generate_summary(text)
39
+ except Exception:
40
+ pass
41
+
42
+ # 4️⃣ Mindmap
43
+ if "mindmap" in tasks:
44
+ try:
45
+ note["mindmap"] = await generate_mindmap(text)
46
+ except Exception:
47
+ pass
48
+
49
+ return note
app/services/{summary_service.py β†’ enrichment/summary.py} RENAMED
@@ -1,10 +1,11 @@
1
  import asyncio
 
2
  import logging
3
  import random
4
- import re
5
  import time
 
6
 
7
- from app.config import GEMINI_API_KEY
8
 
9
  try:
10
  import google.genai as genai
@@ -15,45 +16,41 @@ try:
15
  except Exception:
16
  genai = None
17
  genai_errors = None
18
- logging.warning("[summary_service] google.genai module not found; summaries disabled")
19
 
20
  try:
21
  from google.api_core.exceptions import GoogleAPIError
22
  except Exception:
23
  GoogleAPIError = Exception
24
 
25
- _SUMMARY_MODEL = "gemini-2.5-flash"
26
- _gemini_client = None
27
 
28
  if not genai:
29
- logging.warning("[summary_service] google.genai not available, summary will be empty")
30
  elif not GEMINI_API_KEY:
31
- logging.warning("[summary_service] GEMINI_API_KEY is not set, summary will be empty")
32
  else:
33
  try:
34
- _gemini_client = genai.Client(api_key=GEMINI_API_KEY)
35
- logging.info(f"[summary_service] Initialized google.genai client with model={_SUMMARY_MODEL}")
36
  except Exception as e:
37
  logging.exception(f"[summary_service] Failed to init google.genai client: {e}")
38
- _gemini_client = None
39
-
40
 
41
  async def generate_summary(text: str) -> str:
42
- """TαΊ‘o tΓ³m tαΊ―t ngαΊ―n gọn 3-5 cΓ’u, mα»™t Δ‘oαΊ‘n vΔƒn duy nhαΊ₯t.
43
- Fallback: trαΊ£ "" nαΊΏu khΓ΄ng cΓ³ model hoαΊ·c lα»—i.
44
- """
45
- if not _gemini_client:
46
  return ""
47
 
48
  prompt = f"""
49
  BαΊ‘n lΓ  chuyΓͺn gia tΓ³m tαΊ―t. HΓ£y tΓ³m tαΊ―t vΔƒn bαΊ£n sau thΓ nh mα»™t Δ‘oαΊ‘n vΔƒn duy nhαΊ₯t.
50
-
51
  YΓͺu cαΊ§u:
52
  1. ViαΊΏt khoαΊ£ng 3-5 cΓ’u, tα»•ng hợp Δ‘αΊ§y Δ‘α»§ chα»§ đề vΓ  cΓ‘c Γ½ chΓ­nh.
53
  2. ViαΊΏt liền mαΊ‘ch, KHΓ”NG xuα»‘ng dΓ²ng, KHΓ”NG dΓΉng gαΊ‘ch Δ‘αΊ§u dΓ²ng hay Δ‘Γ‘nh sα»‘.
54
  3. Chỉ dα»±a trΓͺn thΓ΄ng tin được cung cαΊ₯p, tuyệt Δ‘α»‘i KHΓ”NG tα»± thΓͺm thΓ΄ng tin bΓͺn ngoΓ i.
55
  4. TrαΊ£ về VΔ‚N BαΊ’N THUαΊ¦N (plain text), khΓ΄ng bọc trong ``` hoαΊ·c JSON.
56
-
57
  VΔƒn bαΊ£n:
58
  \"\"\"{text}\"\"\"
59
  """
@@ -67,8 +64,8 @@ Văn bản:
67
  last_exc = None
68
  for attempt in range(1, MAX_RETRIES + 1):
69
  try:
70
- resp = _gemini_client.models.generate_content(
71
- model=_SUMMARY_MODEL,
72
  contents=prompt,
73
  )
74
  return (resp.text or "").strip()
@@ -121,4 +118,4 @@ Văn bản:
121
  logging.info("[summary_service] Returning fallback summary after errors")
122
  return fallback
123
  except Exception:
124
- return ""
 
1
  import asyncio
2
+ import os
3
  import logging
4
  import random
 
5
  import time
6
+ import re
7
 
8
+ from app.config import GEMINI_API_KEY, GEMINI_MODEL
9
 
10
  try:
11
  import google.genai as genai
 
16
  except Exception:
17
  genai = None
18
  genai_errors = None
19
+ logging.warning("[summary_service] google.genai module not found; summary generation disabled")
20
 
21
  try:
22
  from google.api_core.exceptions import GoogleAPIError
23
  except Exception:
24
  GoogleAPIError = Exception
25
 
26
+ gemini_client = None
 
27
 
28
  if not genai:
29
+ logging.warning("[summary_service] google.genai not available, summary generation will be disabled")
30
  elif not GEMINI_API_KEY:
31
+ logging.warning("[summary_service] GEMINI_API_KEY is not set, summary generation will be disabled")
32
  else:
33
  try:
34
+ gemini_client = genai.Client(api_key=GEMINI_API_KEY)
35
+ logging.info(f"[summary_service] Initialized google.genai client with model={GEMINI_MODEL}")
36
  except Exception as e:
37
  logging.exception(f"[summary_service] Failed to init google.genai client: {e}")
38
+ gemini_client = None
 
39
 
40
  async def generate_summary(text: str) -> str:
41
+ if not gemini_client:
42
+ return ""
43
+
44
+ if not text:
45
  return ""
46
 
47
  prompt = f"""
48
  BαΊ‘n lΓ  chuyΓͺn gia tΓ³m tαΊ―t. HΓ£y tΓ³m tαΊ―t vΔƒn bαΊ£n sau thΓ nh mα»™t Δ‘oαΊ‘n vΔƒn duy nhαΊ₯t.
 
49
  YΓͺu cαΊ§u:
50
  1. ViαΊΏt khoαΊ£ng 3-5 cΓ’u, tα»•ng hợp Δ‘αΊ§y Δ‘α»§ chα»§ đề vΓ  cΓ‘c Γ½ chΓ­nh.
51
  2. ViαΊΏt liền mαΊ‘ch, KHΓ”NG xuα»‘ng dΓ²ng, KHΓ”NG dΓΉng gαΊ‘ch Δ‘αΊ§u dΓ²ng hay Δ‘Γ‘nh sα»‘.
52
  3. Chỉ dα»±a trΓͺn thΓ΄ng tin được cung cαΊ₯p, tuyệt Δ‘α»‘i KHΓ”NG tα»± thΓͺm thΓ΄ng tin bΓͺn ngoΓ i.
53
  4. TrαΊ£ về VΔ‚N BαΊ’N THUαΊ¦N (plain text), khΓ΄ng bọc trong ``` hoαΊ·c JSON.
 
54
  VΔƒn bαΊ£n:
55
  \"\"\"{text}\"\"\"
56
  """
 
64
  last_exc = None
65
  for attempt in range(1, MAX_RETRIES + 1):
66
  try:
67
+ resp = gemini_client.models.generate_content(
68
+ model=GEMINI_MODEL,
69
  contents=prompt,
70
  )
71
  return (resp.text or "").strip()
 
118
  logging.info("[summary_service] Returning fallback summary after errors")
119
  return fallback
120
  except Exception:
121
+ return ""
app/services/enrichment/title_keywords.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import json
4
+ import asyncio
5
+ import time
6
+ import random
7
+ import re
8
+ from app.config import GEMINI_API_KEY, GEMINI_MODEL
9
+
10
+ try:
11
+ import google.genai as genai
12
+ try:
13
+ from google.genai import errors as genai_errors
14
+ except Exception:
15
+ genai_errors = None
16
+ except Exception:
17
+ genai = None
18
+ genai_errors = None
19
+ logging.warning("[keywords_service] google.genai module not found; keyword extraction disabled")
20
+
21
+ try:
22
+ from google.api_core.exceptions import GoogleAPIError
23
+ except Exception:
24
+ GoogleAPIError = Exception
25
+
26
+ gemini_client = None
27
+
28
+ if not genai:
29
+ logging.warning("[keywords_service] google.genai not available, keyword extraction will be disabled")
30
+ elif not GEMINI_API_KEY:
31
+ logging.warning("[keywords_service] GEMINI_API_KEY is not set, keyword extraction will be disabled")
32
+ else:
33
+ try:
34
+ gemini_client = genai.Client(api_key=GEMINI_API_KEY)
35
+ logging.info(f"[keywords_service] Initialized google.genai client with model={GEMINI_MODEL}")
36
+ except Exception as e:
37
+ logging.exception(f"[keywords_service] Failed to init google.genai client: {e}")
38
+ gemini_client = None
39
+
40
+
41
+ async def extract_title_and_keywords(text: str) -> tuple[str | None, list[str]]:
42
+ if not text or not text.strip():
43
+ return None, []
44
+
45
+ if not gemini_client and not genai:
46
+ # AI not available β†’ safe fallback
47
+ return None, []
48
+
49
+ prompt = f"""
50
+ BαΊ‘n lΓ  mα»™t hệ thα»‘ng Xα»­ lΓ½ HαΊ­u kα»³ NLP (NLP Post-Processing) TiαΊΏng Việt.
51
+
52
+ Nhiệm vα»₯:
53
+ 1. Sinh **tiΓͺu đề (title)** ngαΊ―n gọn phαΊ£n Γ‘nh Δ‘ΓΊng chα»§ đề chΓ­nh cα»§a vΔƒn bαΊ£n:
54
+ - Độ dΓ i tα»‘i Δ‘a **10 tα»«**
55
+ - Mang tΓ­nh mΓ΄ tαΊ£, trung tΓ­nh, phΓΉ hợp lΓ m tiΓͺu đề ghi chΓΊ (note)
56
+ - KHΓ”NG giαΊ­t tΓ­t, KHΓ”NG suy diα»…n quΓ‘ mα»©c
57
+
58
+ 2. RΓΊt trΓ­ch cΓ‘c **tα»« khΓ³a quan trọng** phαΊ£n Γ‘nh Δ‘ΓΊng **chα»§ đề vΓ  nα»™i dung chΓ­nh** cα»§a vΔƒn bαΊ£n.
59
+ - Mα»—i tα»« khΓ³a dΓ i tα»« **1–4 tα»«**.
60
+ - Ζ―u tiΓͺn danh tα»«, cα»₯m danh tα»«, thuαΊ­t ngα»―, khΓ‘i niệm chΓ­nh.
61
+ - LoαΊ‘i bỏ tα»« chung chung, tα»« đệm, tα»« cαΊ£m thΓ‘n, tα»« lαΊ·p nghΔ©a.
62
+ - KHÔNG diễn giải, KHÔNG tóm tắt, KHÔNG chuẩn hóa lẑi văn bản.
63
+ - KHΓ”NG tαΊ‘o tα»« khΓ³a khΓ΄ng xuαΊ₯t hiện hoαΊ·c khΓ΄ng suy luαΊ­n hợp lΓ½ tα»« vΔƒn bαΊ£n.
64
+
65
+ Quy tαΊ―c:
66
+ - Sα»‘ lượng tα»« khΓ³a: 3–10 (tΓΉy Δ‘α»™ dΓ i vΓ  nα»™i dung vΔƒn bαΊ£n).
67
+ - Giα»― nguyΓͺn chα»― thường/hoa theo cΓ‘ch viαΊΏt phα»• biαΊΏn.
68
+ - KHΓ”NG trΓΉng lαΊ·p tα»« khΓ³a.
69
+ - KHΓ”NG sαΊ―p xαΊΏp theo bαΊ£ng chα»― cΓ‘i; Ζ°u tiΓͺn theo mα»©c Δ‘α»™ quan trọng.
70
+
71
+ Văn bản đầu vào:
72
+ \"\"\"{text}\"\"\"
73
+
74
+ YÊU CẦU ĐẦU RA:
75
+ - Chỉ trαΊ£ về **JSON hợp lệ**
76
+ - KHΓ”NG giαΊ£i thΓ­ch
77
+ - KHΓ”NG markdown
78
+ - KHΓ”NG thΓͺm trường khΓ‘c ngoΓ i schema dΖ°α»›i Δ‘Γ’y
79
+
80
+ CαΊ₯u trΓΊc JSON bαΊ―t buα»™c:
81
+ {{
82
+ "title": "TiΓͺu đề ngαΊ―n gọn",
83
+ "keywords": ["Tα»« khΓ³a 1", "Tα»« khΓ³a 2", "..."]
84
+ }}
85
+ """
86
+
87
+ loop = asyncio.get_event_loop()
88
+
89
+ MAX_RETRIES = 3
90
+ BASE_DELAY = 1.0
91
+
92
+ def call():
93
+ last_exc = None
94
+ for attempt in range(1, MAX_RETRIES + 1):
95
+ try:
96
+ if gemini_client:
97
+ resp = gemini_client.models.generate_content(
98
+ model=GEMINI_MODEL,
99
+ contents=prompt,
100
+ )
101
+ return getattr(resp, "text", "") or ""
102
+ else:
103
+ model = genai.GenerativeModel(GEMINI_MODEL) if genai else None
104
+ if model:
105
+ resp = model.generate_content(prompt)
106
+ return getattr(resp, "text", "") or ""
107
+ return ""
108
+ except Exception as e:
109
+ last_exc = e
110
+ is_server_error = False
111
+ try:
112
+ if genai_errors and isinstance(e, genai_errors.ServerError):
113
+ is_server_error = True
114
+ except Exception:
115
+ pass
116
+
117
+ msg = str(e)
118
+ if "503" in msg or "UNAVAILABLE" in msg or is_server_error:
119
+ if attempt < MAX_RETRIES:
120
+ delay = BASE_DELAY * (2 ** (attempt - 1))
121
+ delay = delay + random.uniform(0, 0.5 * delay)
122
+ logging.warning(f"[keywords_service] model overloaded (attempt {attempt}/{MAX_RETRIES}), retrying after {delay:.2f}s")
123
+ time.sleep(delay)
124
+ continue
125
+ logging.exception(f"[keywords_service] extract_keywords call failed on attempt {attempt}: {e}")
126
+ break
127
+
128
+ if last_exc:
129
+ raise last_exc
130
+ return ""
131
+
132
+ try:
133
+ raw = await loop.run_in_executor(None, call)
134
+ title, keywords = _parse_response(raw)
135
+ return title, keywords
136
+ except GoogleAPIError as e:
137
+ logging.error("[title_keywords_service] Gemini API error: %s", e)
138
+ except Exception as e:
139
+ logging.exception("[title_keywords_service] extract failed: %s", e)
140
+
141
+ return None, []
142
+
143
+ def _parse_response(raw: str) -> tuple[str | None, list[str]]:
144
+ if not raw:
145
+ return None, []
146
+
147
+ raw = raw.strip()
148
+
149
+ # Try extracting JSON block
150
+ start = raw.find("{")
151
+ end = raw.rfind("}")
152
+
153
+ if start != -1 and end != -1 and end > start:
154
+ raw_json = raw[start : end + 1]
155
+ else:
156
+ raw_json = raw
157
+
158
+ try:
159
+ parsed = json.loads(raw_json)
160
+ except Exception as e:
161
+ logging.warning(
162
+ "[title_keywords_service] Failed to parse JSON: %s | raw=%r",
163
+ e,
164
+ raw[:300],
165
+ )
166
+ return None, []
167
+
168
+ title = parsed.get("title")
169
+ keywords = parsed.get("keywords")
170
+
171
+ # Validate title
172
+ if not isinstance(title, str) or not title.strip():
173
+ title = None
174
+ else:
175
+ title = title.strip()
176
+
177
+ # Validate keywords
178
+ if not isinstance(keywords, list):
179
+ keywords = []
180
+ else:
181
+ keywords = [
182
+ k.strip()
183
+ for k in keywords
184
+ if isinstance(k, str) and k.strip()
185
+ ]
186
+
187
+ return title, keywords
app/services/folder_store.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.infra.firebase import db
2
+
3
+ COL = "folders"
4
+
5
+ def create_folder(folder: dict):
6
+ db.collection(COL).document(folder["folder_id"]).set(folder)
7
+
8
+
9
+ def get_folder(folder_id: str):
10
+ doc = db.collection(COL).document(folder_id).get()
11
+ return doc.to_dict() if doc.exists else None
12
+
13
+
14
+ def list_folders():
15
+ return [d.to_dict() for d in db.collection(COL).stream()]
16
+
17
+
18
+ def update_folder(folder_id: str, data: dict):
19
+ db.collection(COL).document(folder_id).update(data)
20
+
21
+
22
+ def delete_folder(folder_id: str):
23
+ db.collection(COL).document(folder_id).delete()
app/services/note_store.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.infra.firebase import db
2
+
3
+ COL = "notes"
4
+
5
+ def create_note(note: dict):
6
+ ref = db.collection(COL).document(note["note_id"])
7
+ if ref.get().exists:
8
+ raise ValueError(f"Note already exists: {note['note_id']}")
9
+ ref.set(note)
10
+
11
+
12
+ def update_note(note_id: str, data: dict):
13
+ if not data:
14
+ return
15
+ db.collection(COL).document(note_id).update(data)
16
+
17
+ def get_note(note_id: str):
18
+ doc = db.collection(COL).document(note_id).get()
19
+ return doc.to_dict() if doc.exists else None
20
+
21
+
22
+ def list_notes(folder_id: str | None = None):
23
+ q = db.collection(COL)
24
+ if folder_id:
25
+ q = q.where("folder_id", "==", folder_id)
26
+ return [d.to_dict() for d in q.stream()]
app/services/storage.py DELETED
@@ -1,33 +0,0 @@
1
- from datetime import datetime
2
- from app.services.firebase import db
3
-
4
- COLLECTION = "notes"
5
-
6
- import logging
7
-
8
- def create_note(note_id: str, payload: dict):
9
- now = datetime.utcnow()
10
- payload.update({
11
- "status": "created",
12
- "created_at": now,
13
- "updated_at": now
14
- })
15
- logging.info(f"[NoteService] create_note: id={note_id}, keys={list(payload.keys())}")
16
- db.collection(COLLECTION).document(note_id).set(payload)
17
- logging.info(f"[NoteService] create_note: saved id={note_id}")
18
-
19
-
20
- def update_note(note_id: str, data: dict = None, status: str = None):
21
- updates = {"updated_at": datetime.utcnow()}
22
- if data:
23
- updates.update(data)
24
- if status:
25
- updates["status"] = status
26
-
27
- db.collection(COLLECTION).document(note_id).update(updates)
28
-
29
- def get_note(note_id: str):
30
- logging.info(f"[NoteService] get_note: id={note_id}")
31
- doc = db.collection(COLLECTION).document(note_id).get()
32
- logging.info(f"[NoteService] get_note: exists={doc.exists}")
33
- return doc.to_dict() if doc.exists else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/utils/id.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ import uuid
2
+
3
+ def new_id() -> str:
4
+ return uuid.uuid4().hex
app/utils/time.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ import time
2
+
3
+ def now_ts() -> int:
4
+ return int(time.time() * 1000)
requirements.txt CHANGED
@@ -2,3 +2,4 @@ fastapi
2
  uvicorn
3
  google-genai
4
  firebase-admin
 
 
2
  uvicorn
3
  google-genai
4
  firebase-admin
5
+ pydantic