Spaces:
Sleeping
Sleeping
Commit
Β·
7402e0f
1
Parent(s):
2432a11
Update note services logic
Browse files- .gitignore +5 -2
- Dockerfile +6 -2
- app/api/folders/folders_create.py +26 -0
- app/api/folders/folders_delete.py +14 -0
- app/api/folders/folders_get.py +17 -0
- app/api/folders/folders_update.py +25 -0
- app/api/notes.py +0 -35
- app/api/notes/notes_audio.py +43 -0
- app/api/notes/notes_get.py +18 -0
- app/api/notes/notes_regenerate.py +34 -0
- app/api/notes/notes_text.py +45 -0
- app/api/notes/notes_update.py +30 -0
- app/config.py +7 -3
- app/infra/auth.py +10 -0
- app/{services β infra}/firebase.py +3 -7
- app/jobs/enrichment_job.py +45 -44
- app/main.py +17 -4
- app/models/enums.py +11 -0
- app/models/folder.py +9 -0
- app/models/note.py +31 -0
- app/services/{mindmap_service.py β enrichment/mindmap.py} +13 -21
- app/services/enrichment/normalize.py +148 -0
- app/services/enrichment/pipeline.py +49 -0
- app/services/{summary_service.py β enrichment/summary.py} +17 -20
- app/services/enrichment/title_keywords.py +187 -0
- app/services/folder_store.py +23 -0
- app/services/note_store.py +26 -0
- app/services/storage.py +0 -33
- app/utils/id.py +4 -0
- app/utils/time.py +4 -0
- requirements.txt +1 -0
.gitignore
CHANGED
|
@@ -1,4 +1,7 @@
|
|
| 1 |
-
NOTE_SERVICE_FLOW.md
|
| 2 |
.myvenv
|
| 3 |
__pycache__/
|
| 4 |
-
*.pyc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
.myvenv
|
| 2 |
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
*.json
|
| 5 |
+
.env
|
| 6 |
+
*.txt
|
| 7 |
+
docs/
|
Dockerfile
CHANGED
|
@@ -1,10 +1,14 @@
|
|
| 1 |
FROM python:3.11-slim
|
| 2 |
WORKDIR /app
|
| 3 |
|
| 4 |
-
|
| 5 |
-
COPY requirements.txt ./
|
| 6 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 7 |
|
| 8 |
COPY . .
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
| 1 |
FROM python:3.11-slim
|
| 2 |
WORKDIR /app
|
| 3 |
|
| 4 |
+
COPY requirements.txt .
|
|
|
|
| 5 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 6 |
|
| 7 |
COPY . .
|
| 8 |
|
| 9 |
+
EXPOSE 7860
|
| 10 |
+
|
| 11 |
+
HEALTHCHECK --interval=30s --timeout=3s --start-period=10s \
|
| 12 |
+
CMD curl -f http://localhost:7860/health || exit 1
|
| 13 |
+
|
| 14 |
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
app/api/folders/folders_create.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from app.services.folder_store import create_folder
|
| 4 |
+
from app.utils.id import new_id
|
| 5 |
+
from app.utils.time import now_ts
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
router = APIRouter(prefix="/folders", tags=["folders"])
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class CreateFolderRequest(BaseModel):
|
| 12 |
+
name: str
|
| 13 |
+
color_hex: str | None = None
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@router.post("")
|
| 17 |
+
def create(req: CreateFolderRequest):
|
| 18 |
+
folder = {
|
| 19 |
+
"folder_id": new_id(),
|
| 20 |
+
"name": req.name,
|
| 21 |
+
"color_hex": req.color_hex,
|
| 22 |
+
"created_at": now_ts(),
|
| 23 |
+
"updated_at": now_ts(),
|
| 24 |
+
}
|
| 25 |
+
create_folder(folder)
|
| 26 |
+
return folder
|
app/api/folders/folders_delete.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException
|
| 2 |
+
from app.services.folder_store import get_folder, delete_folder
|
| 3 |
+
|
| 4 |
+
router = APIRouter(prefix="/folders", tags=["folders"])
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@router.delete("/{folder_id}")
|
| 8 |
+
def delete_folder_api(folder_id: str):
|
| 9 |
+
folder = get_folder(folder_id)
|
| 10 |
+
if not folder:
|
| 11 |
+
raise HTTPException(status_code=404, detail="Folder not found")
|
| 12 |
+
|
| 13 |
+
delete_folder(folder_id)
|
| 14 |
+
return {"folder_id": folder_id, "deleted": True}
|
app/api/folders/folders_get.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException
|
| 2 |
+
from app.services.folder_store import get_folder, list_folders
|
| 3 |
+
|
| 4 |
+
router = APIRouter(prefix="/folders", tags=["folders"])
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
@router.get("")
|
| 8 |
+
def get_folders():
|
| 9 |
+
return list_folders()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@router.get("/{folder_id}")
|
| 13 |
+
def get_folder_by_id(folder_id: str):
|
| 14 |
+
folder = get_folder(folder_id)
|
| 15 |
+
if not folder:
|
| 16 |
+
raise HTTPException(status_code=404, detail="Folder not found")
|
| 17 |
+
return folder
|
app/api/folders/folders_update.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from typing import Optional
|
| 4 |
+
from app.services.folder_store import get_folder, update_folder
|
| 5 |
+
from app.utils.time import now_ts
|
| 6 |
+
|
| 7 |
+
router = APIRouter(prefix="/folders", tags=["folders"])
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class UpdateFolderRequest(BaseModel):
|
| 11 |
+
name: Optional[str] = None
|
| 12 |
+
color_hex: Optional[str] = None
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@router.patch("/{folder_id}")
|
| 16 |
+
def update_folder_api(folder_id: str, req: UpdateFolderRequest):
|
| 17 |
+
folder = get_folder(folder_id)
|
| 18 |
+
if not folder:
|
| 19 |
+
raise HTTPException(status_code=404, detail="Folder not found")
|
| 20 |
+
|
| 21 |
+
updates = req.dict(exclude_unset=True)
|
| 22 |
+
updates["updated_at"] = now_ts()
|
| 23 |
+
|
| 24 |
+
update_folder(folder_id, updates)
|
| 25 |
+
return {"folder_id": folder_id, "updated": True}
|
app/api/notes.py
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
from fastapi import APIRouter, BackgroundTasks, HTTPException
|
| 2 |
-
from pydantic import BaseModel
|
| 3 |
-
from typing import Optional, List
|
| 4 |
-
from app.services.storage import create_note as storage_create_note, get_note
|
| 5 |
-
from app.jobs.enrichment_job import run_enrichment
|
| 6 |
-
|
| 7 |
-
router = APIRouter(prefix="/notes")
|
| 8 |
-
|
| 9 |
-
class CreateNoteRequest(BaseModel):
|
| 10 |
-
note_id: str
|
| 11 |
-
raw_text: str
|
| 12 |
-
normalized_text: Optional[str] = None
|
| 13 |
-
keywords: List[str] = []
|
| 14 |
-
chunks: list = []
|
| 15 |
-
duration: Optional[float] = None
|
| 16 |
-
sample_rate: Optional[int] = None
|
| 17 |
-
asr_model: Optional[str] = None
|
| 18 |
-
normalization_model: Optional[str] = None
|
| 19 |
-
generate: List[str] = []
|
| 20 |
-
|
| 21 |
-
@router.post("")
|
| 22 |
-
async def create_note(req: CreateNoteRequest, bg: BackgroundTasks):
|
| 23 |
-
storage_create_note(req.note_id, req.dict())
|
| 24 |
-
|
| 25 |
-
if req.generate:
|
| 26 |
-
bg.add_task(run_enrichment, req.note_id, req.generate)
|
| 27 |
-
|
| 28 |
-
return {"note_id": req.note_id, "status": "stored"}
|
| 29 |
-
|
| 30 |
-
@router.get("/{note_id}")
|
| 31 |
-
def fetch_note(note_id: str):
|
| 32 |
-
note = get_note(note_id)
|
| 33 |
-
if not note:
|
| 34 |
-
raise HTTPException(404, "Note not found")
|
| 35 |
-
return note
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/api/notes/notes_audio.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, BackgroundTasks, HTTPException
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from typing import Dict, List, Optional
|
| 4 |
+
from app.services.note_store import create_note
|
| 5 |
+
from app.jobs.enrichment_job import enrich_note
|
| 6 |
+
from app.models.enums import NoteType, NoteStatus
|
| 7 |
+
from app.utils.time import now_ts
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
router = APIRouter(prefix="/internal/notes", tags=["internal"])
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class CreateAudioNoteRequest(BaseModel):
|
| 14 |
+
note_id: str
|
| 15 |
+
raw_text: str
|
| 16 |
+
metadata: Dict
|
| 17 |
+
generate: List[str] = []
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@router.post("/audio")
|
| 21 |
+
async def create_audio_note(req: CreateAudioNoteRequest, bg: BackgroundTasks):
|
| 22 |
+
now = now_ts()
|
| 23 |
+
has_enrichment = bool(req.generate)
|
| 24 |
+
|
| 25 |
+
note = {
|
| 26 |
+
"note_id": req.note_id,
|
| 27 |
+
"type": NoteType.audio,
|
| 28 |
+
"raw_text": req.raw_text,
|
| 29 |
+
"metadata": req.metadata,
|
| 30 |
+
"status": NoteStatus.processing if has_enrichment else NoteStatus.created,
|
| 31 |
+
"created_at": now,
|
| 32 |
+
"updated_at": now,
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
create_note(note)
|
| 36 |
+
|
| 37 |
+
if has_enrichment:
|
| 38 |
+
bg.add_task(enrich_note, req.note_id, req.generate)
|
| 39 |
+
|
| 40 |
+
return {
|
| 41 |
+
"note_id": req.note_id,
|
| 42 |
+
"status": note["status"],
|
| 43 |
+
}
|
app/api/notes/notes_get.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException
|
| 2 |
+
from app.services.note_store import get_note, list_notes
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
router = APIRouter(prefix="/notes", tags=["notes"])
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
@router.get("")
|
| 9 |
+
def get_notes(folder_id: str | None = None):
|
| 10 |
+
return list_notes(folder_id)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@router.get("/{note_id}")
|
| 14 |
+
def get_note_by_id(note_id: str):
|
| 15 |
+
note = get_note(note_id)
|
| 16 |
+
if not note:
|
| 17 |
+
raise HTTPException(404, "Note not found")
|
| 18 |
+
return note
|
app/api/notes/notes_regenerate.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.models.enums import NoteStatus
|
| 2 |
+
from app.utils.time import now_ts
|
| 3 |
+
from fastapi import APIRouter, BackgroundTasks, HTTPException
|
| 4 |
+
from pydantic import BaseModel
|
| 5 |
+
from typing import List
|
| 6 |
+
from app.services.note_store import get_note, update_note
|
| 7 |
+
from app.jobs.enrichment_job import enrich_note
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
router = APIRouter(prefix="/notes", tags=["notes"])
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class RegenerateRequest(BaseModel):
|
| 14 |
+
generate: List[str]
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
@router.post("/{note_id}/regenerate")
|
| 18 |
+
def regenerate_note(note_id: str, req: RegenerateRequest, bg: BackgroundTasks):
|
| 19 |
+
note = get_note(note_id)
|
| 20 |
+
if not note:
|
| 21 |
+
raise HTTPException(404, "Note not found")
|
| 22 |
+
|
| 23 |
+
# mark processing immediately
|
| 24 |
+
update_note(note_id, {
|
| 25 |
+
"status": NoteStatus.processing,
|
| 26 |
+
"updated_at": now_ts(),
|
| 27 |
+
})
|
| 28 |
+
|
| 29 |
+
bg.add_task(enrich_note, note_id, req.generate)
|
| 30 |
+
|
| 31 |
+
return {
|
| 32 |
+
"note_id": note_id,
|
| 33 |
+
"status": NoteStatus.processing,
|
| 34 |
+
}
|
app/api/notes/notes_text.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, BackgroundTasks
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from typing import List, Optional
|
| 4 |
+
from app.services.note_store import create_note
|
| 5 |
+
from app.jobs.enrichment_job import enrich_note
|
| 6 |
+
from app.models.enums import NoteType, NoteStatus
|
| 7 |
+
from app.utils.id import new_id
|
| 8 |
+
from app.utils.time import now_ts
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
router = APIRouter(prefix="/notes", tags=["notes"])
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class CreateTextNoteRequest(BaseModel):
|
| 15 |
+
raw_text: str
|
| 16 |
+
folder_id: Optional[str] = None
|
| 17 |
+
generate: List[str] = []
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@router.post("/text")
|
| 21 |
+
async def create_text_note(req: CreateTextNoteRequest, bg: BackgroundTasks):
|
| 22 |
+
note_id = new_id()
|
| 23 |
+
now = now_ts()
|
| 24 |
+
|
| 25 |
+
has_enrichment = bool(req.generate)
|
| 26 |
+
|
| 27 |
+
note = {
|
| 28 |
+
"note_id": note_id,
|
| 29 |
+
"type": NoteType.text,
|
| 30 |
+
"raw_text": req.raw_text,
|
| 31 |
+
"folder_id": req.folder_id,
|
| 32 |
+
"status": NoteStatus.processing if has_enrichment else NoteStatus.created,
|
| 33 |
+
"created_at": now,
|
| 34 |
+
"updated_at": now,
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
create_note(note)
|
| 38 |
+
|
| 39 |
+
if has_enrichment:
|
| 40 |
+
bg.add_task(enrich_note, note_id, req.generate)
|
| 41 |
+
|
| 42 |
+
return {
|
| 43 |
+
"note_id": note_id,
|
| 44 |
+
"status": note["status"],
|
| 45 |
+
}
|
app/api/notes/notes_update.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from typing import Optional
|
| 4 |
+
from app.services.note_store import get_note, update_note
|
| 5 |
+
from app.utils.time import now_ts
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
router = APIRouter(prefix="/notes", tags=["notes"])
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class UpdateNoteRequest(BaseModel):
|
| 12 |
+
folder_id: Optional[str] = None
|
| 13 |
+
title: Optional[str] = None
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@router.patch("/{note_id}")
|
| 17 |
+
def update_note_api(note_id: str, req: UpdateNoteRequest):
|
| 18 |
+
note = get_note(note_id)
|
| 19 |
+
if not note:
|
| 20 |
+
raise HTTPException(404, "Note not found")
|
| 21 |
+
|
| 22 |
+
updates = req.dict(exclude_unset=True)
|
| 23 |
+
|
| 24 |
+
# β KhΓ΄ng cho client sα»a status trα»±c tiαΊΏp
|
| 25 |
+
updates.pop("status", None)
|
| 26 |
+
|
| 27 |
+
updates["updated_at"] = now_ts()
|
| 28 |
+
|
| 29 |
+
update_note(note_id, updates)
|
| 30 |
+
return {"note_id": note_id, "updated": True}
|
app/config.py
CHANGED
|
@@ -1,5 +1,9 @@
|
|
| 1 |
import os
|
| 2 |
|
| 3 |
-
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY"
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
|
| 3 |
+
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
| 4 |
+
if not GEMINI_API_KEY:
|
| 5 |
+
raise RuntimeError("GEMINI_API_KEY is required")
|
| 6 |
+
|
| 7 |
+
GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
|
| 8 |
+
|
| 9 |
+
FIREBASE_SERVICE_ACCOUNT = os.getenv("FIREBASE_SERVICE_ACCOUNT", "")
|
app/infra/auth.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import Request
|
| 2 |
+
|
| 3 |
+
def get_current_user_id(request: Request) -> str | None:
|
| 4 |
+
"""
|
| 5 |
+
Placeholder for auth context.
|
| 6 |
+
Later:
|
| 7 |
+
- Extract from JWT
|
| 8 |
+
- Or API Gateway headers
|
| 9 |
+
"""
|
| 10 |
+
return request.headers.get("x-user-id")
|
app/{services β infra}/firebase.py
RENAMED
|
@@ -1,13 +1,9 @@
|
|
| 1 |
import firebase_admin
|
| 2 |
from firebase_admin import credentials, firestore
|
| 3 |
-
import json
|
| 4 |
-
from app.config import FIREBASE_SERVICE_ACCOUNT
|
| 5 |
|
| 6 |
if not firebase_admin._apps:
|
| 7 |
-
|
| 8 |
-
raise RuntimeError("Missing FIREBASE_SERVICE_ACCOUNT")
|
| 9 |
-
|
| 10 |
-
cred = credentials.Certificate(json.loads(FIREBASE_SERVICE_ACCOUNT))
|
| 11 |
firebase_admin.initialize_app(cred)
|
| 12 |
|
| 13 |
-
db = firestore.client()
|
|
|
|
| 1 |
import firebase_admin
|
| 2 |
from firebase_admin import credentials, firestore
|
| 3 |
+
import os, json
|
|
|
|
| 4 |
|
| 5 |
if not firebase_admin._apps:
|
| 6 |
+
cred = credentials.Certificate(json.loads(os.environ["FIREBASE_SERVICE_ACCOUNT"]))
|
|
|
|
|
|
|
|
|
|
| 7 |
firebase_admin.initialize_app(cred)
|
| 8 |
|
| 9 |
+
db = firestore.client()
|
app/jobs/enrichment_job.py
CHANGED
|
@@ -1,53 +1,54 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
# note = get_note(note_id)
|
| 7 |
-
# if not note:
|
| 8 |
-
# return
|
| 9 |
-
|
| 10 |
-
# text = note.get("normalized_text") or note["raw_text"]
|
| 11 |
-
|
| 12 |
-
# update_note(note_id, status="processing")
|
| 13 |
-
# updates = {}
|
| 14 |
-
|
| 15 |
-
# if "summary" in tasks:
|
| 16 |
-
# updates["summary"] = await generate_summary(text)
|
| 17 |
-
|
| 18 |
-
# if "mindmap" in tasks:
|
| 19 |
-
# updates["mindmap"] = await generate_mindmap(text)
|
| 20 |
|
| 21 |
-
|
| 22 |
|
| 23 |
-
import logging
|
| 24 |
-
from app.services.storage import get_note, update_note
|
| 25 |
-
from app.services.summary_service import generate_summary
|
| 26 |
-
from app.services.mindmap_service import generate_mindmap
|
| 27 |
|
| 28 |
-
async def
|
| 29 |
note = get_note(note_id)
|
| 30 |
if not note:
|
| 31 |
-
logging.warning(f"[enrichment] Note not found: {note_id}")
|
| 32 |
return
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
try:
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from app.services.note_store import get_note, update_note
|
| 3 |
+
from app.services.enrichment.pipeline import run_pipeline
|
| 4 |
+
from app.models.enums import NoteStatus
|
| 5 |
+
from app.utils.time import now_ts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
+
logger = logging.getLogger(__name__)
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
+
async def enrich_note(note_id: str, tasks: list[str]):
|
| 11 |
note = get_note(note_id)
|
| 12 |
if not note:
|
|
|
|
| 13 |
return
|
| 14 |
|
| 15 |
+
# Mark as processing
|
| 16 |
+
update_note(
|
| 17 |
+
note_id,
|
| 18 |
+
{
|
| 19 |
+
"status": NoteStatus.processing,
|
| 20 |
+
"updated_at": now_ts(),
|
| 21 |
+
},
|
| 22 |
+
)
|
| 23 |
|
| 24 |
try:
|
| 25 |
+
# Run NLP pipeline (mutates a copy of note)
|
| 26 |
+
enriched = await run_pipeline(note, tasks)
|
| 27 |
+
|
| 28 |
+
updates = {}
|
| 29 |
+
|
| 30 |
+
# Only persist known enrichment fields
|
| 31 |
+
for field in (
|
| 32 |
+
"title",
|
| 33 |
+
"normalized_text",
|
| 34 |
+
"keywords",
|
| 35 |
+
"summary",
|
| 36 |
+
"mindmap",
|
| 37 |
+
):
|
| 38 |
+
if field in enriched:
|
| 39 |
+
updates[field] = enriched[field]
|
| 40 |
+
|
| 41 |
+
updates["status"] = NoteStatus.ready
|
| 42 |
+
updates["updated_at"] = now_ts()
|
| 43 |
+
|
| 44 |
+
update_note(note_id, updates)
|
| 45 |
+
|
| 46 |
+
except Exception:
|
| 47 |
+
logger.exception("Enrichment failed note_id=%s", note_id)
|
| 48 |
+
update_note(
|
| 49 |
+
note_id,
|
| 50 |
+
{
|
| 51 |
+
"status": NoteStatus.error,
|
| 52 |
+
"updated_at": now_ts(),
|
| 53 |
+
},
|
| 54 |
+
)
|
app/main.py
CHANGED
|
@@ -1,9 +1,22 @@
|
|
| 1 |
from fastapi import FastAPI
|
| 2 |
-
from app.api.notes import
|
|
|
|
| 3 |
|
| 4 |
-
app = FastAPI(title="Note
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
@app.get("/health")
|
| 8 |
def health():
|
| 9 |
-
return {"status": "ok"}
|
|
|
|
| 1 |
from fastapi import FastAPI
|
| 2 |
+
from app.api.notes import notes_text, notes_audio, notes_get, notes_update, notes_regenerate
|
| 3 |
+
from app.api.folders import folders_create, folders_get, folders_update, folders_delete
|
| 4 |
|
| 5 |
+
app = FastAPI(title="Note Service API")
|
| 6 |
+
|
| 7 |
+
# Notes
|
| 8 |
+
app.include_router(notes_text.router)
|
| 9 |
+
app.include_router(notes_audio.router)
|
| 10 |
+
app.include_router(notes_get.router)
|
| 11 |
+
app.include_router(notes_update.router)
|
| 12 |
+
app.include_router(notes_regenerate.router)
|
| 13 |
+
|
| 14 |
+
# Folders
|
| 15 |
+
app.include_router(folders_create.router)
|
| 16 |
+
app.include_router(folders_get.router)
|
| 17 |
+
app.include_router(folders_update.router)
|
| 18 |
+
app.include_router(folders_delete.router)
|
| 19 |
|
| 20 |
@app.get("/health")
|
| 21 |
def health():
|
| 22 |
+
return {"status": "ok"}
|
app/models/enums.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from enum import Enum
|
| 2 |
+
|
| 3 |
+
class NoteType(str, Enum):
|
| 4 |
+
audio = "audio"
|
| 5 |
+
text = "text"
|
| 6 |
+
|
| 7 |
+
class NoteStatus(str, Enum):
|
| 8 |
+
created = "created"
|
| 9 |
+
processing = "processing"
|
| 10 |
+
ready = "ready"
|
| 11 |
+
error = "error"
|
app/models/folder.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
|
| 3 |
+
class Folder(BaseModel):
|
| 4 |
+
folder_id: str
|
| 5 |
+
name: str
|
| 6 |
+
color_hex: str | None = None
|
| 7 |
+
user_id: str | None = None
|
| 8 |
+
created_at: int
|
| 9 |
+
updated_at: int
|
app/models/note.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
from typing import Optional, List, Dict
|
| 3 |
+
from app.models.enums import NoteType, NoteStatus
|
| 4 |
+
|
| 5 |
+
class AudioMetadata(BaseModel):
|
| 6 |
+
duration: Optional[float] = None
|
| 7 |
+
chunks: Optional[list] = None
|
| 8 |
+
sample_rate: Optional[int] = None
|
| 9 |
+
asr_model: Optional[str] = None
|
| 10 |
+
|
| 11 |
+
class NoteMetadata(BaseModel):
|
| 12 |
+
audio: Optional[AudioMetadata] = None
|
| 13 |
+
client: Optional[Dict] = None
|
| 14 |
+
|
| 15 |
+
class Note(BaseModel):
|
| 16 |
+
note_id: str
|
| 17 |
+
type: NoteType
|
| 18 |
+
|
| 19 |
+
title: Optional[str] = None
|
| 20 |
+
raw_text: Optional[str] = None
|
| 21 |
+
normalized_text: Optional[str] = None
|
| 22 |
+
keywords: Optional[List[str]] = None
|
| 23 |
+
summary: Optional[str] = None
|
| 24 |
+
mindmap: Optional[Dict] = None
|
| 25 |
+
|
| 26 |
+
folder_id: Optional[str] = None
|
| 27 |
+
metadata: Optional[NoteMetadata] = None
|
| 28 |
+
|
| 29 |
+
status: NoteStatus
|
| 30 |
+
created_at: int
|
| 31 |
+
updated_at: int
|
app/services/{mindmap_service.py β enrichment/mindmap.py}
RENAMED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
import asyncio
|
| 2 |
-
import json
|
| 3 |
-
import logging
|
| 4 |
-
import random
|
| 5 |
import re
|
| 6 |
-
import
|
| 7 |
-
|
| 8 |
-
from
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
try:
|
| 11 |
import google.genai as genai
|
|
@@ -23,8 +23,7 @@ try:
|
|
| 23 |
except Exception:
|
| 24 |
GoogleAPIError = Exception
|
| 25 |
|
| 26 |
-
|
| 27 |
-
_gemini_client = None
|
| 28 |
|
| 29 |
if not genai:
|
| 30 |
logging.warning("[mindmap_service] google.genai not available, mindmap generation will be disabled")
|
|
@@ -32,23 +31,18 @@ elif not GEMINI_API_KEY:
|
|
| 32 |
logging.warning("[mindmap_service] GEMINI_API_KEY is not set, mindmap generation will be disabled")
|
| 33 |
else:
|
| 34 |
try:
|
| 35 |
-
|
| 36 |
-
logging.info(f"[mindmap_service] Initialized google.genai client with model={
|
| 37 |
except Exception as e:
|
| 38 |
logging.exception(f"[mindmap_service] Failed to init google.genai client: {e}")
|
| 39 |
-
|
| 40 |
-
|
| 41 |
|
| 42 |
async def generate_mindmap(text: str) -> dict:
|
| 43 |
-
|
| 44 |
-
Fallback: trαΊ£ {{}} nαΊΏu khΓ΄ng cΓ³ model hoαΊ·c lα»i.
|
| 45 |
-
"""
|
| 46 |
-
if not _gemini_client:
|
| 47 |
return {}
|
| 48 |
|
| 49 |
prompt = f"""
|
| 50 |
BαΊ‘n lΓ chuyΓͺn gia tαΊ‘o SΖ‘ Δα» tΖ° duy. HΓ£y phΓ’n tΓch vΔn bαΊ£n sau vΓ tαΊ‘o CαΊ€U TRΓC JSON Mindmap.
|
| 51 |
-
|
| 52 |
YΓͺu cαΊ§u:
|
| 53 |
1. XΓ‘c Δα»nh Γ chΓnh lΓ m Root.
|
| 54 |
2. PhΓ’n tΓ‘ch Γ½ phα»₯ thΓ nh nhΓ‘nh con (tα»i Δa 3 cαΊ₯p).
|
|
@@ -57,7 +51,6 @@ YΓͺu cαΊ§u:
|
|
| 57 |
- Root: "#6200EE"
|
| 58 |
- CΓ‘c nhΓ‘nh con: sα» dα»₯ng mα»t trong cΓ‘c mΓ u: "#F59E2B", "#2ECF9A", "#2F9BFF"
|
| 59 |
5. CHα» TRαΊ’ Vα» JSON, khΓ΄ng giαΊ£i thΓch thΓͺm.
|
| 60 |
-
|
| 61 |
CαΊ₯u trΓΊc JSON bαΊ―t buα»c:
|
| 62 |
{{
|
| 63 |
"root": {{
|
|
@@ -85,8 +78,8 @@ VΔn bαΊ£n:
|
|
| 85 |
last_exc = None
|
| 86 |
for attempt in range(1, MAX_RETRIES + 1):
|
| 87 |
try:
|
| 88 |
-
resp =
|
| 89 |
-
model=
|
| 90 |
contents=prompt,
|
| 91 |
)
|
| 92 |
return resp.text or ""
|
|
@@ -160,4 +153,3 @@ VΔn bαΊ£n:
|
|
| 160 |
return fallback
|
| 161 |
except Exception:
|
| 162 |
return {}
|
| 163 |
-
|
|
|
|
| 1 |
import asyncio
|
|
|
|
|
|
|
|
|
|
| 2 |
import re
|
| 3 |
+
import logging
|
| 4 |
+
from random import random
|
| 5 |
+
from time import time
|
| 6 |
+
import google.genai as genai
|
| 7 |
+
import json
|
| 8 |
+
from app.config import GEMINI_API_KEY, GEMINI_MODEL
|
| 9 |
|
| 10 |
try:
|
| 11 |
import google.genai as genai
|
|
|
|
| 23 |
except Exception:
|
| 24 |
GoogleAPIError = Exception
|
| 25 |
|
| 26 |
+
gemini_client = None
|
|
|
|
| 27 |
|
| 28 |
if not genai:
|
| 29 |
logging.warning("[mindmap_service] google.genai not available, mindmap generation will be disabled")
|
|
|
|
| 31 |
logging.warning("[mindmap_service] GEMINI_API_KEY is not set, mindmap generation will be disabled")
|
| 32 |
else:
|
| 33 |
try:
|
| 34 |
+
gemini_client = genai.Client(api_key=GEMINI_API_KEY)
|
| 35 |
+
logging.info(f"[mindmap_service] Initialized google.genai client with model={GEMINI_MODEL}")
|
| 36 |
except Exception as e:
|
| 37 |
logging.exception(f"[mindmap_service] Failed to init google.genai client: {e}")
|
| 38 |
+
gemini_client = None
|
|
|
|
| 39 |
|
| 40 |
async def generate_mindmap(text: str) -> dict:
|
| 41 |
+
if not text:
|
|
|
|
|
|
|
|
|
|
| 42 |
return {}
|
| 43 |
|
| 44 |
prompt = f"""
|
| 45 |
BαΊ‘n lΓ chuyΓͺn gia tαΊ‘o SΖ‘ Δα» tΖ° duy. HΓ£y phΓ’n tΓch vΔn bαΊ£n sau vΓ tαΊ‘o CαΊ€U TRΓC JSON Mindmap.
|
|
|
|
| 46 |
YΓͺu cαΊ§u:
|
| 47 |
1. XΓ‘c Δα»nh Γ chΓnh lΓ m Root.
|
| 48 |
2. PhΓ’n tΓ‘ch Γ½ phα»₯ thΓ nh nhΓ‘nh con (tα»i Δa 3 cαΊ₯p).
|
|
|
|
| 51 |
- Root: "#6200EE"
|
| 52 |
- CΓ‘c nhΓ‘nh con: sα» dα»₯ng mα»t trong cΓ‘c mΓ u: "#F59E2B", "#2ECF9A", "#2F9BFF"
|
| 53 |
5. CHα» TRαΊ’ Vα» JSON, khΓ΄ng giαΊ£i thΓch thΓͺm.
|
|
|
|
| 54 |
CαΊ₯u trΓΊc JSON bαΊ―t buα»c:
|
| 55 |
{{
|
| 56 |
"root": {{
|
|
|
|
| 78 |
last_exc = None
|
| 79 |
for attempt in range(1, MAX_RETRIES + 1):
|
| 80 |
try:
|
| 81 |
+
resp = gemini_client.models.generate_content(
|
| 82 |
+
model=GEMINI_MODEL,
|
| 83 |
contents=prompt,
|
| 84 |
)
|
| 85 |
return resp.text or ""
|
|
|
|
| 153 |
return fallback
|
| 154 |
except Exception:
|
| 155 |
return {}
|
|
|
app/services/enrichment/normalize.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import os
|
| 3 |
+
import logging
|
| 4 |
+
import json
|
| 5 |
+
import re
|
| 6 |
+
import time
|
| 7 |
+
import random
|
| 8 |
+
from app.config import GEMINI_API_KEY, GEMINI_MODEL
|
| 9 |
+
|
| 10 |
+
try:
|
| 11 |
+
import google.genai as genai
|
| 12 |
+
try:
|
| 13 |
+
from google.genai import errors as genai_errors
|
| 14 |
+
except Exception:
|
| 15 |
+
genai_errors = None
|
| 16 |
+
except Exception:
|
| 17 |
+
genai = None
|
| 18 |
+
genai_errors = None
|
| 19 |
+
logging.warning("[normalize_service] google.genai module not found; normalization disabled")
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
from google.api_core.exceptions import GoogleAPIError
|
| 23 |
+
except Exception:
|
| 24 |
+
GoogleAPIError = Exception
|
| 25 |
+
|
| 26 |
+
gemini_client = None
|
| 27 |
+
|
| 28 |
+
if not genai:
|
| 29 |
+
logging.warning("[normalize_service] google.genai not available, normalization will be disabled")
|
| 30 |
+
elif not GEMINI_API_KEY:
|
| 31 |
+
logging.warning("[normalize_service] GEMINI_API_KEY is not set, normalization will be disabled")
|
| 32 |
+
else:
|
| 33 |
+
try:
|
| 34 |
+
gemini_client = genai.Client(api_key=GEMINI_API_KEY)
|
| 35 |
+
logging.info(f"[normalize_service] Initialized google.genai client with model={GEMINI_MODEL}")
|
| 36 |
+
except Exception as e:
|
| 37 |
+
logging.exception(f"[normalize_service] Failed to init google.genai client: {e}")
|
| 38 |
+
gemini_client = None
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
async def normalize_text(raw_text: str) -> str:
|
| 42 |
+
if not raw_text:
|
| 43 |
+
return raw_text
|
| 44 |
+
|
| 45 |
+
prompt = f"""
|
| 46 |
+
BαΊ‘n lΓ mα»t hα» thα»ng Xα» lΓ½ HαΊu kα»³ NLP (NLP Post-Processing) TiαΊΏng Viα»t.
|
| 47 |
+
ΔαΊ§u vΓ o lΓ vΔn bαΊ£n thΓ΄ (raw transcript), cΓ³ thα» thiαΊΏu dαΊ₯u cΓ’u vΓ sai chΓnh tαΊ£ do nhαΊn dαΊ‘ng giα»ng nΓ³i.
|
| 48 |
+
|
| 49 |
+
Nhiα»m vα»₯:
|
| 50 |
+
- Sα»a lα»i chΓnh tαΊ£ do ASR.
|
| 51 |
+
- ThΓͺm dαΊ₯u cΓ’u phΓΉ hợp.
|
| 52 |
+
- ViαΊΏt hoa ΔΓΊng chuαΊ©n tiαΊΏng Viα»t (ΔαΊ§u cΓ’u, tΓͺn riΓͺng nαΊΏu suy luαΊn Δược).
|
| 53 |
+
- LoαΊ‘i bα» cΓ‘c tα»«/cα»₯m tα»« bα» lαΊ·p lαΊ‘i vΓ΄ nghΔ©a.
|
| 54 |
+
- Giα»― nguyΓͺn nα»i dung vΓ Γ½ nghΔ©a gα»c, khΓ΄ng rΓΊt gα»n, khΓ΄ng thΓͺm thΓ΄ng tin mα»i.
|
| 55 |
+
|
| 56 |
+
YΓU CαΊ¦U ΔαΊ¦U RA:
|
| 57 |
+
- Chα» trαΊ£ vα» vΔn bαΊ£n ΔΓ£ chuαΊ©n hΓ³a
|
| 58 |
+
- KHΓNG JSON, KHΓNG giαΊ£i thΓch, KHΓNG markdown
|
| 59 |
+
|
| 60 |
+
VΔn bαΊ£n ΔαΊ§u vΓ o:
|
| 61 |
+
\"\"\"{raw_text}\"\"\"
|
| 62 |
+
"""
|
| 63 |
+
|
| 64 |
+
loop = asyncio.get_event_loop()
|
| 65 |
+
|
| 66 |
+
MAX_RETRIES = 3
|
| 67 |
+
BASE_DELAY = 1.0
|
| 68 |
+
|
| 69 |
+
def call():
|
| 70 |
+
last_exc = None
|
| 71 |
+
for attempt in range(1, MAX_RETRIES + 1):
|
| 72 |
+
try:
|
| 73 |
+
if gemini_client:
|
| 74 |
+
resp = gemini_client.models.generate_content(
|
| 75 |
+
model=GEMINI_MODEL,
|
| 76 |
+
contents=prompt,
|
| 77 |
+
)
|
| 78 |
+
return resp.text or ""
|
| 79 |
+
else:
|
| 80 |
+
model = genai.GenerativeModel(GEMINI_MODEL) if genai else None
|
| 81 |
+
if model:
|
| 82 |
+
resp = model.generate_content(prompt)
|
| 83 |
+
return getattr(resp, "text", "") or ""
|
| 84 |
+
return ""
|
| 85 |
+
except Exception as e:
|
| 86 |
+
last_exc = e
|
| 87 |
+
msg = str(e)
|
| 88 |
+
if "503" in msg or "UNAVAILABLE" in msg:
|
| 89 |
+
if attempt < MAX_RETRIES:
|
| 90 |
+
delay = BASE_DELAY * (2 ** (attempt - 1))
|
| 91 |
+
delay += random.uniform(0, 0.5 * delay)
|
| 92 |
+
logging.warning(
|
| 93 |
+
f"[normalize_service] model overloaded "
|
| 94 |
+
f"(attempt {attempt}/{MAX_RETRIES}), retrying after {delay:.2f}s"
|
| 95 |
+
)
|
| 96 |
+
time.sleep(delay)
|
| 97 |
+
continue
|
| 98 |
+
logging.exception(
|
| 99 |
+
f"[normalize_service] normalize call failed on attempt {attempt}: {e}"
|
| 100 |
+
)
|
| 101 |
+
break
|
| 102 |
+
|
| 103 |
+
if last_exc:
|
| 104 |
+
raise last_exc
|
| 105 |
+
return ""
|
| 106 |
+
|
| 107 |
+
try:
|
| 108 |
+
raw = await loop.run_in_executor(None, call)
|
| 109 |
+
|
| 110 |
+
if raw:
|
| 111 |
+
text = raw.strip()
|
| 112 |
+
|
| 113 |
+
# defensive cleanup (trΖ°α»ng hợp model vαΊ«n lα»‘ trαΊ£ markdown)
|
| 114 |
+
text = re.sub(r"^```.*?\n", "", text, flags=re.DOTALL)
|
| 115 |
+
text = re.sub(r"```$", "", text)
|
| 116 |
+
text = text.strip('"').strip("'").strip()
|
| 117 |
+
|
| 118 |
+
if text:
|
| 119 |
+
return text
|
| 120 |
+
|
| 121 |
+
except GoogleAPIError as e:
|
| 122 |
+
logging.error(f"[normalize_service] Gemini API error: {e}")
|
| 123 |
+
except Exception as e:
|
| 124 |
+
logging.exception(f"[normalize_service] normalize_text failed: {e}")
|
| 125 |
+
|
| 126 |
+
# ===== fallback: best-effort local normalization =====
|
| 127 |
+
try:
|
| 128 |
+
text = raw_text.strip()
|
| 129 |
+
text = re.sub(r"\s+", " ", text)
|
| 130 |
+
|
| 131 |
+
if text and text[-1] not in ".!?":
|
| 132 |
+
text += "."
|
| 133 |
+
|
| 134 |
+
def cap_sentences(s: str) -> str:
|
| 135 |
+
parts = re.split(r'([.!?]\s+)', s)
|
| 136 |
+
out = ""
|
| 137 |
+
for i in range(0, len(parts), 2):
|
| 138 |
+
sentence = parts[i].strip()
|
| 139 |
+
sep = parts[i + 1] if i + 1 < len(parts) else ""
|
| 140 |
+
if sentence:
|
| 141 |
+
sentence = sentence[0].upper() + sentence[1:]
|
| 142 |
+
out += sentence + sep
|
| 143 |
+
return out
|
| 144 |
+
|
| 145 |
+
return cap_sentences(text)
|
| 146 |
+
|
| 147 |
+
except Exception:
|
| 148 |
+
return raw_text
|
app/services/enrichment/pipeline.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.services.enrichment.normalize import normalize_text
|
| 2 |
+
from app.services.enrichment.title_keywords import extract_title_and_keywords
|
| 3 |
+
from app.services.enrichment.summary import generate_summary
|
| 4 |
+
from app.services.enrichment.mindmap import generate_mindmap
|
| 5 |
+
|
| 6 |
+
async def run_pipeline(note: dict, tasks: list[str]):
|
| 7 |
+
raw_text = note.get("raw_text") or ""
|
| 8 |
+
if not raw_text.strip():
|
| 9 |
+
# Nothing to process
|
| 10 |
+
return note
|
| 11 |
+
|
| 12 |
+
text = raw_text
|
| 13 |
+
|
| 14 |
+
# 1οΈβ£ Normalize
|
| 15 |
+
if "normalize" in tasks:
|
| 16 |
+
try:
|
| 17 |
+
text = await normalize_text(text)
|
| 18 |
+
note["normalized_text"] = text
|
| 19 |
+
except Exception:
|
| 20 |
+
# Fail-safe: keep raw_text
|
| 21 |
+
note["normalized_text"] = text
|
| 22 |
+
|
| 23 |
+
# 2οΈβ£ Title + Keywords (same AI call)
|
| 24 |
+
if "keywords" in tasks:
|
| 25 |
+
try:
|
| 26 |
+
title, keywords = await extract_title_and_keywords(text)
|
| 27 |
+
if title:
|
| 28 |
+
note["title"] = title
|
| 29 |
+
if keywords:
|
| 30 |
+
note["keywords"] = keywords
|
| 31 |
+
except Exception:
|
| 32 |
+
# Fail-safe: skip title & keywords
|
| 33 |
+
pass
|
| 34 |
+
|
| 35 |
+
# 3οΈβ£ Summary
|
| 36 |
+
if "summary" in tasks:
|
| 37 |
+
try:
|
| 38 |
+
note["summary"] = await generate_summary(text)
|
| 39 |
+
except Exception:
|
| 40 |
+
pass
|
| 41 |
+
|
| 42 |
+
# 4οΈβ£ Mindmap
|
| 43 |
+
if "mindmap" in tasks:
|
| 44 |
+
try:
|
| 45 |
+
note["mindmap"] = await generate_mindmap(text)
|
| 46 |
+
except Exception:
|
| 47 |
+
pass
|
| 48 |
+
|
| 49 |
+
return note
|
app/services/{summary_service.py β enrichment/summary.py}
RENAMED
|
@@ -1,10 +1,11 @@
|
|
| 1 |
import asyncio
|
|
|
|
| 2 |
import logging
|
| 3 |
import random
|
| 4 |
-
import re
|
| 5 |
import time
|
|
|
|
| 6 |
|
| 7 |
-
from app.config import GEMINI_API_KEY
|
| 8 |
|
| 9 |
try:
|
| 10 |
import google.genai as genai
|
|
@@ -15,45 +16,41 @@ try:
|
|
| 15 |
except Exception:
|
| 16 |
genai = None
|
| 17 |
genai_errors = None
|
| 18 |
-
logging.warning("[summary_service] google.genai module not found;
|
| 19 |
|
| 20 |
try:
|
| 21 |
from google.api_core.exceptions import GoogleAPIError
|
| 22 |
except Exception:
|
| 23 |
GoogleAPIError = Exception
|
| 24 |
|
| 25 |
-
|
| 26 |
-
_gemini_client = None
|
| 27 |
|
| 28 |
if not genai:
|
| 29 |
-
logging.warning("[summary_service] google.genai not available, summary will be
|
| 30 |
elif not GEMINI_API_KEY:
|
| 31 |
-
logging.warning("[summary_service] GEMINI_API_KEY is not set, summary will be
|
| 32 |
else:
|
| 33 |
try:
|
| 34 |
-
|
| 35 |
-
logging.info(f"[summary_service] Initialized google.genai client with model={
|
| 36 |
except Exception as e:
|
| 37 |
logging.exception(f"[summary_service] Failed to init google.genai client: {e}")
|
| 38 |
-
|
| 39 |
-
|
| 40 |
|
| 41 |
async def generate_summary(text: str) -> str:
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
if not
|
| 46 |
return ""
|
| 47 |
|
| 48 |
prompt = f"""
|
| 49 |
BαΊ‘n lΓ chuyΓͺn gia tΓ³m tαΊ―t. HΓ£y tΓ³m tαΊ―t vΔn bαΊ£n sau thΓ nh mα»t ΔoαΊ‘n vΔn duy nhαΊ₯t.
|
| 50 |
-
|
| 51 |
YΓͺu cαΊ§u:
|
| 52 |
1. ViαΊΏt khoαΊ£ng 3-5 cΓ’u, tα»ng hợp ΔαΊ§y Δα»§ chα»§ Δα» vΓ cΓ‘c Γ½ chΓnh.
|
| 53 |
2. ViαΊΏt liα»n mαΊ‘ch, KHΓNG xuα»ng dΓ²ng, KHΓNG dΓΉng gαΊ‘ch ΔαΊ§u dΓ²ng hay ΔΓ‘nh sα».
|
| 54 |
3. Chα» dα»±a trΓͺn thΓ΄ng tin Δược cung cαΊ₯p, tuyα»t Δα»i KHΓNG tα»± thΓͺm thΓ΄ng tin bΓͺn ngoΓ i.
|
| 55 |
4. TrαΊ£ vα» VΔN BαΊ’N THUαΊ¦N (plain text), khΓ΄ng bα»c trong ``` hoαΊ·c JSON.
|
| 56 |
-
|
| 57 |
VΔn bαΊ£n:
|
| 58 |
\"\"\"{text}\"\"\"
|
| 59 |
"""
|
|
@@ -67,8 +64,8 @@ VΔn bαΊ£n:
|
|
| 67 |
last_exc = None
|
| 68 |
for attempt in range(1, MAX_RETRIES + 1):
|
| 69 |
try:
|
| 70 |
-
resp =
|
| 71 |
-
model=
|
| 72 |
contents=prompt,
|
| 73 |
)
|
| 74 |
return (resp.text or "").strip()
|
|
@@ -121,4 +118,4 @@ VΔn bαΊ£n:
|
|
| 121 |
logging.info("[summary_service] Returning fallback summary after errors")
|
| 122 |
return fallback
|
| 123 |
except Exception:
|
| 124 |
-
return ""
|
|
|
|
| 1 |
import asyncio
|
| 2 |
+
import os
|
| 3 |
import logging
|
| 4 |
import random
|
|
|
|
| 5 |
import time
|
| 6 |
+
import re
|
| 7 |
|
| 8 |
+
from app.config import GEMINI_API_KEY, GEMINI_MODEL
|
| 9 |
|
| 10 |
try:
|
| 11 |
import google.genai as genai
|
|
|
|
| 16 |
except Exception:
|
| 17 |
genai = None
|
| 18 |
genai_errors = None
|
| 19 |
+
logging.warning("[summary_service] google.genai module not found; summary generation disabled")
|
| 20 |
|
| 21 |
try:
|
| 22 |
from google.api_core.exceptions import GoogleAPIError
|
| 23 |
except Exception:
|
| 24 |
GoogleAPIError = Exception
|
| 25 |
|
| 26 |
+
gemini_client = None
|
|
|
|
| 27 |
|
| 28 |
if not genai:
|
| 29 |
+
logging.warning("[summary_service] google.genai not available, summary generation will be disabled")
|
| 30 |
elif not GEMINI_API_KEY:
|
| 31 |
+
logging.warning("[summary_service] GEMINI_API_KEY is not set, summary generation will be disabled")
|
| 32 |
else:
|
| 33 |
try:
|
| 34 |
+
gemini_client = genai.Client(api_key=GEMINI_API_KEY)
|
| 35 |
+
logging.info(f"[summary_service] Initialized google.genai client with model={GEMINI_MODEL}")
|
| 36 |
except Exception as e:
|
| 37 |
logging.exception(f"[summary_service] Failed to init google.genai client: {e}")
|
| 38 |
+
gemini_client = None
|
|
|
|
| 39 |
|
| 40 |
async def generate_summary(text: str) -> str:
|
| 41 |
+
if not gemini_client:
|
| 42 |
+
return ""
|
| 43 |
+
|
| 44 |
+
if not text:
|
| 45 |
return ""
|
| 46 |
|
| 47 |
prompt = f"""
|
| 48 |
BαΊ‘n lΓ chuyΓͺn gia tΓ³m tαΊ―t. HΓ£y tΓ³m tαΊ―t vΔn bαΊ£n sau thΓ nh mα»t ΔoαΊ‘n vΔn duy nhαΊ₯t.
|
|
|
|
| 49 |
YΓͺu cαΊ§u:
|
| 50 |
1. ViαΊΏt khoαΊ£ng 3-5 cΓ’u, tα»ng hợp ΔαΊ§y Δα»§ chα»§ Δα» vΓ cΓ‘c Γ½ chΓnh.
|
| 51 |
2. ViαΊΏt liα»n mαΊ‘ch, KHΓNG xuα»ng dΓ²ng, KHΓNG dΓΉng gαΊ‘ch ΔαΊ§u dΓ²ng hay ΔΓ‘nh sα».
|
| 52 |
3. Chα» dα»±a trΓͺn thΓ΄ng tin Δược cung cαΊ₯p, tuyα»t Δα»i KHΓNG tα»± thΓͺm thΓ΄ng tin bΓͺn ngoΓ i.
|
| 53 |
4. TrαΊ£ vα» VΔN BαΊ’N THUαΊ¦N (plain text), khΓ΄ng bα»c trong ``` hoαΊ·c JSON.
|
|
|
|
| 54 |
VΔn bαΊ£n:
|
| 55 |
\"\"\"{text}\"\"\"
|
| 56 |
"""
|
|
|
|
| 64 |
last_exc = None
|
| 65 |
for attempt in range(1, MAX_RETRIES + 1):
|
| 66 |
try:
|
| 67 |
+
resp = gemini_client.models.generate_content(
|
| 68 |
+
model=GEMINI_MODEL,
|
| 69 |
contents=prompt,
|
| 70 |
)
|
| 71 |
return (resp.text or "").strip()
|
|
|
|
| 118 |
logging.info("[summary_service] Returning fallback summary after errors")
|
| 119 |
return fallback
|
| 120 |
except Exception:
|
| 121 |
+
return ""
|
app/services/enrichment/title_keywords.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import logging
|
| 3 |
+
import json
|
| 4 |
+
import asyncio
|
| 5 |
+
import time
|
| 6 |
+
import random
|
| 7 |
+
import re
|
| 8 |
+
from app.config import GEMINI_API_KEY, GEMINI_MODEL
|
| 9 |
+
|
| 10 |
+
try:
|
| 11 |
+
import google.genai as genai
|
| 12 |
+
try:
|
| 13 |
+
from google.genai import errors as genai_errors
|
| 14 |
+
except Exception:
|
| 15 |
+
genai_errors = None
|
| 16 |
+
except Exception:
|
| 17 |
+
genai = None
|
| 18 |
+
genai_errors = None
|
| 19 |
+
logging.warning("[keywords_service] google.genai module not found; keyword extraction disabled")
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
from google.api_core.exceptions import GoogleAPIError
|
| 23 |
+
except Exception:
|
| 24 |
+
GoogleAPIError = Exception
|
| 25 |
+
|
| 26 |
+
gemini_client = None
|
| 27 |
+
|
| 28 |
+
if not genai:
|
| 29 |
+
logging.warning("[keywords_service] google.genai not available, keyword extraction will be disabled")
|
| 30 |
+
elif not GEMINI_API_KEY:
|
| 31 |
+
logging.warning("[keywords_service] GEMINI_API_KEY is not set, keyword extraction will be disabled")
|
| 32 |
+
else:
|
| 33 |
+
try:
|
| 34 |
+
gemini_client = genai.Client(api_key=GEMINI_API_KEY)
|
| 35 |
+
logging.info(f"[keywords_service] Initialized google.genai client with model={GEMINI_MODEL}")
|
| 36 |
+
except Exception as e:
|
| 37 |
+
logging.exception(f"[keywords_service] Failed to init google.genai client: {e}")
|
| 38 |
+
gemini_client = None
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
async def extract_title_and_keywords(text: str) -> tuple[str | None, list[str]]:
|
| 42 |
+
if not text or not text.strip():
|
| 43 |
+
return None, []
|
| 44 |
+
|
| 45 |
+
if not gemini_client and not genai:
|
| 46 |
+
# AI not available β safe fallback
|
| 47 |
+
return None, []
|
| 48 |
+
|
| 49 |
+
prompt = f"""
|
| 50 |
+
BαΊ‘n lΓ mα»t hα» thα»ng Xα» lΓ½ HαΊu kα»³ NLP (NLP Post-Processing) TiαΊΏng Viα»t.
|
| 51 |
+
|
| 52 |
+
Nhiα»m vα»₯:
|
| 53 |
+
1. Sinh **tiΓͺu Δα» (title)** ngαΊ―n gα»n phαΊ£n Γ‘nh ΔΓΊng chα»§ Δα» chΓnh cα»§a vΔn bαΊ£n:
|
| 54 |
+
- Δα» dΓ i tα»i Δa **10 tα»«**
|
| 55 |
+
- Mang tΓnh mΓ΄ tαΊ£, trung tΓnh, phΓΉ hợp lΓ m tiΓͺu Δα» ghi chΓΊ (note)
|
| 56 |
+
- KHΓNG giαΊt tΓt, KHΓNG suy diα»
n quΓ‘ mα»©c
|
| 57 |
+
|
| 58 |
+
2. RΓΊt trΓch cΓ‘c **tα»« khΓ³a quan trα»ng** phαΊ£n Γ‘nh ΔΓΊng **chα»§ Δα» vΓ nα»i dung chΓnh** cα»§a vΔn bαΊ£n.
|
| 59 |
+
- Mα»i tα»« khΓ³a dΓ i tα»« **1β4 tα»«**.
|
| 60 |
+
- Ζ―u tiΓͺn danh tα»«, cα»₯m danh tα»«, thuαΊt ngα»―, khΓ‘i niα»m chΓnh.
|
| 61 |
+
- LoαΊ‘i bα» tα»« chung chung, tα»« Δα»m, tα»« cαΊ£m thΓ‘n, tα»« lαΊ·p nghΔ©a.
|
| 62 |
+
- KHΓNG diα»
n giαΊ£i, KHΓNG tΓ³m tαΊ―t, KHΓNG chuαΊ©n hΓ³a lαΊ‘i vΔn bαΊ£n.
|
| 63 |
+
- KHΓNG tαΊ‘o tα»« khΓ³a khΓ΄ng xuαΊ₯t hiα»n hoαΊ·c khΓ΄ng suy luαΊn hợp lΓ½ tα»« vΔn bαΊ£n.
|
| 64 |
+
|
| 65 |
+
Quy tαΊ―c:
|
| 66 |
+
- Sα» lượng tα»« khΓ³a: 3β10 (tΓΉy Δα» dΓ i vΓ nα»i dung vΔn bαΊ£n).
|
| 67 |
+
- Giα»― nguyΓͺn chα»― thΖ°α»ng/hoa theo cΓ‘ch viαΊΏt phα» biαΊΏn.
|
| 68 |
+
- KHΓNG trΓΉng lαΊ·p tα»« khΓ³a.
|
| 69 |
+
- KHΓNG sαΊ―p xαΊΏp theo bαΊ£ng chα»― cΓ‘i; Ζ°u tiΓͺn theo mα»©c Δα» quan trα»ng.
|
| 70 |
+
|
| 71 |
+
VΔn bαΊ£n ΔαΊ§u vΓ o:
|
| 72 |
+
\"\"\"{text}\"\"\"
|
| 73 |
+
|
| 74 |
+
YΓU CαΊ¦U ΔαΊ¦U RA:
|
| 75 |
+
- Chα» trαΊ£ vα» **JSON hợp lα»**
|
| 76 |
+
- KHΓNG giαΊ£i thΓch
|
| 77 |
+
- KHΓNG markdown
|
| 78 |
+
- KHΓNG thΓͺm trΖ°α»ng khΓ‘c ngoΓ i schema dΖ°α»i ΔΓ’y
|
| 79 |
+
|
| 80 |
+
CαΊ₯u trΓΊc JSON bαΊ―t buα»c:
|
| 81 |
+
{{
|
| 82 |
+
"title": "TiΓͺu Δα» ngαΊ―n gα»n",
|
| 83 |
+
"keywords": ["Tα»« khΓ³a 1", "Tα»« khΓ³a 2", "..."]
|
| 84 |
+
}}
|
| 85 |
+
"""
|
| 86 |
+
|
| 87 |
+
loop = asyncio.get_event_loop()
|
| 88 |
+
|
| 89 |
+
MAX_RETRIES = 3
|
| 90 |
+
BASE_DELAY = 1.0
|
| 91 |
+
|
| 92 |
+
def call():
|
| 93 |
+
last_exc = None
|
| 94 |
+
for attempt in range(1, MAX_RETRIES + 1):
|
| 95 |
+
try:
|
| 96 |
+
if gemini_client:
|
| 97 |
+
resp = gemini_client.models.generate_content(
|
| 98 |
+
model=GEMINI_MODEL,
|
| 99 |
+
contents=prompt,
|
| 100 |
+
)
|
| 101 |
+
return getattr(resp, "text", "") or ""
|
| 102 |
+
else:
|
| 103 |
+
model = genai.GenerativeModel(GEMINI_MODEL) if genai else None
|
| 104 |
+
if model:
|
| 105 |
+
resp = model.generate_content(prompt)
|
| 106 |
+
return getattr(resp, "text", "") or ""
|
| 107 |
+
return ""
|
| 108 |
+
except Exception as e:
|
| 109 |
+
last_exc = e
|
| 110 |
+
is_server_error = False
|
| 111 |
+
try:
|
| 112 |
+
if genai_errors and isinstance(e, genai_errors.ServerError):
|
| 113 |
+
is_server_error = True
|
| 114 |
+
except Exception:
|
| 115 |
+
pass
|
| 116 |
+
|
| 117 |
+
msg = str(e)
|
| 118 |
+
if "503" in msg or "UNAVAILABLE" in msg or is_server_error:
|
| 119 |
+
if attempt < MAX_RETRIES:
|
| 120 |
+
delay = BASE_DELAY * (2 ** (attempt - 1))
|
| 121 |
+
delay = delay + random.uniform(0, 0.5 * delay)
|
| 122 |
+
logging.warning(f"[keywords_service] model overloaded (attempt {attempt}/{MAX_RETRIES}), retrying after {delay:.2f}s")
|
| 123 |
+
time.sleep(delay)
|
| 124 |
+
continue
|
| 125 |
+
logging.exception(f"[keywords_service] extract_keywords call failed on attempt {attempt}: {e}")
|
| 126 |
+
break
|
| 127 |
+
|
| 128 |
+
if last_exc:
|
| 129 |
+
raise last_exc
|
| 130 |
+
return ""
|
| 131 |
+
|
| 132 |
+
try:
|
| 133 |
+
raw = await loop.run_in_executor(None, call)
|
| 134 |
+
title, keywords = _parse_response(raw)
|
| 135 |
+
return title, keywords
|
| 136 |
+
except GoogleAPIError as e:
|
| 137 |
+
logging.error("[title_keywords_service] Gemini API error: %s", e)
|
| 138 |
+
except Exception as e:
|
| 139 |
+
logging.exception("[title_keywords_service] extract failed: %s", e)
|
| 140 |
+
|
| 141 |
+
return None, []
|
| 142 |
+
|
| 143 |
+
def _parse_response(raw: str) -> tuple[str | None, list[str]]:
|
| 144 |
+
if not raw:
|
| 145 |
+
return None, []
|
| 146 |
+
|
| 147 |
+
raw = raw.strip()
|
| 148 |
+
|
| 149 |
+
# Try extracting JSON block
|
| 150 |
+
start = raw.find("{")
|
| 151 |
+
end = raw.rfind("}")
|
| 152 |
+
|
| 153 |
+
if start != -1 and end != -1 and end > start:
|
| 154 |
+
raw_json = raw[start : end + 1]
|
| 155 |
+
else:
|
| 156 |
+
raw_json = raw
|
| 157 |
+
|
| 158 |
+
try:
|
| 159 |
+
parsed = json.loads(raw_json)
|
| 160 |
+
except Exception as e:
|
| 161 |
+
logging.warning(
|
| 162 |
+
"[title_keywords_service] Failed to parse JSON: %s | raw=%r",
|
| 163 |
+
e,
|
| 164 |
+
raw[:300],
|
| 165 |
+
)
|
| 166 |
+
return None, []
|
| 167 |
+
|
| 168 |
+
title = parsed.get("title")
|
| 169 |
+
keywords = parsed.get("keywords")
|
| 170 |
+
|
| 171 |
+
# Validate title
|
| 172 |
+
if not isinstance(title, str) or not title.strip():
|
| 173 |
+
title = None
|
| 174 |
+
else:
|
| 175 |
+
title = title.strip()
|
| 176 |
+
|
| 177 |
+
# Validate keywords
|
| 178 |
+
if not isinstance(keywords, list):
|
| 179 |
+
keywords = []
|
| 180 |
+
else:
|
| 181 |
+
keywords = [
|
| 182 |
+
k.strip()
|
| 183 |
+
for k in keywords
|
| 184 |
+
if isinstance(k, str) and k.strip()
|
| 185 |
+
]
|
| 186 |
+
|
| 187 |
+
return title, keywords
|
app/services/folder_store.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.infra.firebase import db
|
| 2 |
+
|
| 3 |
+
COL = "folders"
|
| 4 |
+
|
| 5 |
+
def create_folder(folder: dict):
|
| 6 |
+
db.collection(COL).document(folder["folder_id"]).set(folder)
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def get_folder(folder_id: str):
|
| 10 |
+
doc = db.collection(COL).document(folder_id).get()
|
| 11 |
+
return doc.to_dict() if doc.exists else None
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def list_folders():
|
| 15 |
+
return [d.to_dict() for d in db.collection(COL).stream()]
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def update_folder(folder_id: str, data: dict):
|
| 19 |
+
db.collection(COL).document(folder_id).update(data)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def delete_folder(folder_id: str):
|
| 23 |
+
db.collection(COL).document(folder_id).delete()
|
app/services/note_store.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from app.infra.firebase import db
|
| 2 |
+
|
| 3 |
+
COL = "notes"
|
| 4 |
+
|
| 5 |
+
def create_note(note: dict):
|
| 6 |
+
ref = db.collection(COL).document(note["note_id"])
|
| 7 |
+
if ref.get().exists:
|
| 8 |
+
raise ValueError(f"Note already exists: {note['note_id']}")
|
| 9 |
+
ref.set(note)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def update_note(note_id: str, data: dict):
|
| 13 |
+
if not data:
|
| 14 |
+
return
|
| 15 |
+
db.collection(COL).document(note_id).update(data)
|
| 16 |
+
|
| 17 |
+
def get_note(note_id: str):
|
| 18 |
+
doc = db.collection(COL).document(note_id).get()
|
| 19 |
+
return doc.to_dict() if doc.exists else None
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def list_notes(folder_id: str | None = None):
|
| 23 |
+
q = db.collection(COL)
|
| 24 |
+
if folder_id:
|
| 25 |
+
q = q.where("folder_id", "==", folder_id)
|
| 26 |
+
return [d.to_dict() for d in q.stream()]
|
app/services/storage.py
DELETED
|
@@ -1,33 +0,0 @@
|
|
| 1 |
-
from datetime import datetime
|
| 2 |
-
from app.services.firebase import db
|
| 3 |
-
|
| 4 |
-
COLLECTION = "notes"
|
| 5 |
-
|
| 6 |
-
import logging
|
| 7 |
-
|
| 8 |
-
def create_note(note_id: str, payload: dict):
|
| 9 |
-
now = datetime.utcnow()
|
| 10 |
-
payload.update({
|
| 11 |
-
"status": "created",
|
| 12 |
-
"created_at": now,
|
| 13 |
-
"updated_at": now
|
| 14 |
-
})
|
| 15 |
-
logging.info(f"[NoteService] create_note: id={note_id}, keys={list(payload.keys())}")
|
| 16 |
-
db.collection(COLLECTION).document(note_id).set(payload)
|
| 17 |
-
logging.info(f"[NoteService] create_note: saved id={note_id}")
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
def update_note(note_id: str, data: dict = None, status: str = None):
|
| 21 |
-
updates = {"updated_at": datetime.utcnow()}
|
| 22 |
-
if data:
|
| 23 |
-
updates.update(data)
|
| 24 |
-
if status:
|
| 25 |
-
updates["status"] = status
|
| 26 |
-
|
| 27 |
-
db.collection(COLLECTION).document(note_id).update(updates)
|
| 28 |
-
|
| 29 |
-
def get_note(note_id: str):
|
| 30 |
-
logging.info(f"[NoteService] get_note: id={note_id}")
|
| 31 |
-
doc = db.collection(COLLECTION).document(note_id).get()
|
| 32 |
-
logging.info(f"[NoteService] get_note: exists={doc.exists}")
|
| 33 |
-
return doc.to_dict() if doc.exists else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/utils/id.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import uuid
|
| 2 |
+
|
| 3 |
+
def new_id() -> str:
|
| 4 |
+
return uuid.uuid4().hex
|
app/utils/time.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
|
| 3 |
+
def now_ts() -> int:
|
| 4 |
+
return int(time.time() * 1000)
|
requirements.txt
CHANGED
|
@@ -2,3 +2,4 @@ fastapi
|
|
| 2 |
uvicorn
|
| 3 |
google-genai
|
| 4 |
firebase-admin
|
|
|
|
|
|
| 2 |
uvicorn
|
| 3 |
google-genai
|
| 4 |
firebase-admin
|
| 5 |
+
pydantic
|