feat/ Endpoint Restructure
#5
by rhbt6767 - opened
- API_CONTRACT_BE_GOLANG.md +947 -0
- API_CONTRACT_BE_PYTHON.md +521 -0
- API_ENDPOINTS.md +0 -373
- API_ENDPOINTS_RESTRUCTURE.md +391 -0
- DEV_PLAN.md +41 -4
- REPO_STATUS.md +69 -19
- main.py +21 -13
- src/agents/chat_handler.py +49 -0
- src/api/v1/help.py +82 -0
- src/api/v1/report.py +9 -5
- src/api/v1/tools.py +4 -1
- src/api/v2/__init__.py +4 -0
- src/api/v2/chat.py +165 -0
API_CONTRACT_BE_GOLANG.md
ADDED
|
@@ -0,0 +1,947 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Frontend API Contract
|
| 2 |
+
|
| 3 |
+
Dokumen ini merangkum endpoint Orchestration Agent Service yang dipakai oleh frontend. Fokus flow:
|
| 4 |
+
|
| 5 |
+
1. User login dan menyimpan token.
|
| 6 |
+
2. User menyiapkan knowledge source: upload/proses file, connect database, ingest schema, dan rebuild data catalog.
|
| 7 |
+
3. Setelah knowledge siap, user membuat `new analysis` dengan judul, objective, business question, dan data source binding.
|
| 8 |
+
4. Frontend mengirim pertanyaan ke AI Agent Service terpisah.
|
| 9 |
+
5. Service ini hanya merekam riwayat tanya jawab ke `analyses_messages`.
|
| 10 |
+
|
| 11 |
+
Base URL lokal contoh: `http://localhost:8080`
|
| 12 |
+
|
| 13 |
+
## Konvensi
|
| 14 |
+
|
| 15 |
+
Semua endpoint protected wajib memakai:
|
| 16 |
+
|
| 17 |
+
```http
|
| 18 |
+
Authorization: Bearer <access_token>
|
| 19 |
+
```
|
| 20 |
+
|
| 21 |
+
Endpoint public:
|
| 22 |
+
|
| 23 |
+
- `GET /health`
|
| 24 |
+
- `POST /api/login`
|
| 25 |
+
- `POST /api/refresh`
|
| 26 |
+
|
| 27 |
+
Sebagian besar response memakai envelope:
|
| 28 |
+
|
| 29 |
+
```json
|
| 30 |
+
{
|
| 31 |
+
"status": "success",
|
| 32 |
+
"message": "human-readable message",
|
| 33 |
+
"data": {}
|
| 34 |
+
}
|
| 35 |
+
```
|
| 36 |
+
|
| 37 |
+
Error response:
|
| 38 |
+
|
| 39 |
+
```json
|
| 40 |
+
{
|
| 41 |
+
"status": "error",
|
| 42 |
+
"message": "error message",
|
| 43 |
+
"data": {
|
| 44 |
+
"code": "OPTIONAL_ERROR_CODE"
|
| 45 |
+
}
|
| 46 |
+
}
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
+
Catatan ownership: beberapa endpoint masih menerima `user_id` di body, path, atau query untuk kompatibilitas. Nilainya wajib sama dengan user dari Bearer token.
|
| 50 |
+
|
| 51 |
+
## Flow Frontend
|
| 52 |
+
|
| 53 |
+
### 1. Login
|
| 54 |
+
|
| 55 |
+
Frontend memanggil `POST /api/login`, lalu simpan:
|
| 56 |
+
|
| 57 |
+
- `data.user.id` sebagai `user_id`
|
| 58 |
+
- `data.access_token` untuk header Bearer
|
| 59 |
+
- `data.refresh_token` untuk refresh token rotation
|
| 60 |
+
|
| 61 |
+
`access_token` berlaku 1 jam. `refresh_token` berlaku 7 hari dan akan diganti setiap kali refresh sukses.
|
| 62 |
+
|
| 63 |
+
### 2. Refresh Token
|
| 64 |
+
|
| 65 |
+
Jika request protected menerima `401` karena token expired, panggil `POST /api/refresh` menggunakan refresh token terakhir. Setelah sukses, ganti access token dan refresh token lama dengan token baru dari response.
|
| 66 |
+
|
| 67 |
+
Refresh token lama tidak boleh dipakai lagi setelah refresh sukses.
|
| 68 |
+
|
| 69 |
+
### 3. Menyiapkan Knowledge Source
|
| 70 |
+
|
| 71 |
+
Frontend dapat menyediakan knowledge source dari dokumen dan/atau database.
|
| 72 |
+
|
| 73 |
+
Untuk dokumen:
|
| 74 |
+
|
| 75 |
+
1. Ambil tipe file yang didukung: `GET /api/v1/documents/doctypes`
|
| 76 |
+
2. Upload file: `POST /api/v1/document/upload`
|
| 77 |
+
3. Proses dokumen: `POST /api/v1/document/process`
|
| 78 |
+
4. Pantau status dokumen: `GET /api/v1/documents/{user_id}`
|
| 79 |
+
|
| 80 |
+
Untuk database:
|
| 81 |
+
|
| 82 |
+
1. Ambil tipe database dan schema form: `GET /api/v1/database-clients/dbtypes`
|
| 83 |
+
2. Simpan koneksi database: `POST /api/v1/database-clients`
|
| 84 |
+
3. Ingest schema database: `POST /api/v1/database-clients/{client_id}/ingest?user_id={user_id}`
|
| 85 |
+
4. Pantau koneksi: `GET /api/v1/database-clients/{user_id}`
|
| 86 |
+
|
| 87 |
+
Setelah dokumen/database siap, frontend dapat rebuild dan membaca user data catalog:
|
| 88 |
+
|
| 89 |
+
1. `POST /api/v1/data-catalog/rebuild`
|
| 90 |
+
2. `GET /api/v1/data-catalog/{user_id}`
|
| 91 |
+
|
| 92 |
+
### 4. Membuat New Analysis
|
| 93 |
+
|
| 94 |
+
Frontend menampilkan form:
|
| 95 |
+
|
| 96 |
+
- `analysis_title`
|
| 97 |
+
- `objective`
|
| 98 |
+
- `business_questions`
|
| 99 |
+
- `data_bind`
|
| 100 |
+
|
| 101 |
+
`POST /api/v1/analyses` wajib menerima `analysis_title`, `objective`, `business_questions`, dan `data_bind`. `business_questions` berbentuk array string karena satu analysis dapat membawa lebih dari satu pertanyaan bisnis awal.
|
| 102 |
+
|
| 103 |
+
Flow yang direkomendasikan:
|
| 104 |
+
|
| 105 |
+
1. `POST /api/v1/analyses` dengan title, objective, business_questions, dan data_bind.
|
| 106 |
+
2. Ambil `data.id` dari response sebagai `analysis_id`.
|
| 107 |
+
3. Frontend memanggil AI Agent Service terpisah memakai context analysis, business_questions, dan catalog.
|
| 108 |
+
4. Saat user mulai bertanya ke AI Agent Service, rekam pertanyaan dengan `role=user` ke endpoint messages.
|
| 109 |
+
5. Setelah AI Agent Service menjawab, simpan jawaban dengan `role=ai` ke endpoint messages.
|
| 110 |
+
|
| 111 |
+
### 5. Conversation Recording
|
| 112 |
+
|
| 113 |
+
Endpoint message di service ini tidak memanggil AI agent, tidak melakukan reasoning, dan tidak membuat balasan otomatis.
|
| 114 |
+
|
| 115 |
+
Frontend bertanggung jawab melakukan dua write terpisah:
|
| 116 |
+
|
| 117 |
+
1. Rekam pertanyaan user:
|
| 118 |
+
|
| 119 |
+
```json
|
| 120 |
+
{
|
| 121 |
+
"role": "user",
|
| 122 |
+
"content": "Apa penyebab revenue turun di Q3?"
|
| 123 |
+
}
|
| 124 |
+
```
|
| 125 |
+
|
| 126 |
+
2. Setelah AI Agent Service menjawab, rekam jawaban agent:
|
| 127 |
+
|
| 128 |
+
```json
|
| 129 |
+
{
|
| 130 |
+
"role": "ai",
|
| 131 |
+
"content": "Revenue Q3 turun terutama karena penurunan volume transaksi di segmen enterprise..."
|
| 132 |
+
}
|
| 133 |
+
```
|
| 134 |
+
|
| 135 |
+
## Endpoint Ringkas
|
| 136 |
+
|
| 137 |
+
| Method | Path | Kegunaan |
|
| 138 |
+
| --- | --- | --- |
|
| 139 |
+
| `GET` | `/health` | Health check service |
|
| 140 |
+
| `POST` | `/api/login` | Login dan issue token pair |
|
| 141 |
+
| `POST` | `/api/refresh` | Rotate refresh token dan issue token pair baru |
|
| 142 |
+
| `GET` | `/api/v1/documents/doctypes` | List tipe dokumen yang didukung |
|
| 143 |
+
| `POST` | `/api/v1/document/upload` | Upload dokumen ke Azure Blob Storage |
|
| 144 |
+
| `POST` | `/api/v1/document/upload-local` | Upload dokumen ke local filesystem untuk benchmark |
|
| 145 |
+
| `POST` | `/api/v1/document/process` | Proses dokumen async |
|
| 146 |
+
| `GET` | `/api/v1/documents/{user_id}` | List dokumen milik user |
|
| 147 |
+
| `DELETE` | `/api/v1/document/delete` | Hapus dokumen |
|
| 148 |
+
| `GET` | `/api/v1/database-clients/dbtypes` | List tipe database dan schema credential form |
|
| 149 |
+
| `POST` | `/api/v1/database-clients` | Buat koneksi database |
|
| 150 |
+
| `GET` | `/api/v1/database-clients/{user_id}` | List koneksi database user |
|
| 151 |
+
| `GET` | `/api/v1/database-clients/{user_id}/{client_id}` | Detail koneksi database |
|
| 152 |
+
| `PUT` | `/api/v1/database-clients/{client_id}` | Update koneksi database |
|
| 153 |
+
| `DELETE` | `/api/v1/database-clients/{client_id}` | Hapus koneksi database |
|
| 154 |
+
| `POST` | `/api/v1/database-clients/{client_id}/ingest` | Introspect schema database ke catalog |
|
| 155 |
+
| `POST` | `/api/v1/data-catalog/rebuild` | Rebuild user data catalog |
|
| 156 |
+
| `GET` | `/api/v1/data-catalog/{user_id}` | Ambil user data catalog index |
|
| 157 |
+
| `POST` | `/api/v1/analyses` | Buat analysis baru |
|
| 158 |
+
| `GET` | `/api/v1/analyses` | List analysis user |
|
| 159 |
+
| `GET` | `/api/v1/analyses/{id}` | Detail analysis |
|
| 160 |
+
| `PATCH` | `/api/v1/analyses/{id}` | Update metadata/status analysis |
|
| 161 |
+
| `DELETE` | `/api/v1/analyses/{id}` | Hapus analysis |
|
| 162 |
+
| `PUT` | `/api/v1/analyses/{id}/data-bind` | Update data source binding analysis |
|
| 163 |
+
| `GET` | `/api/v1/analyses/{id}/data-catalog` | Ambil catalog yang scoped ke analysis |
|
| 164 |
+
| `POST` | `/api/v1/analyses/{id}/data-catalog/rebuild` | Rebuild catalog scoped ke analysis dari data_bind |
|
| 165 |
+
| `GET` | `/api/v1/analyses/{id}/messages` | Ambil riwayat pesan analysis |
|
| 166 |
+
| `POST` | `/api/v1/analyses/{id}/messages` | Rekam satu pesan conversation |
|
| 167 |
+
|
| 168 |
+
## Auth
|
| 169 |
+
|
| 170 |
+
### `POST /api/login`
|
| 171 |
+
|
| 172 |
+
Login user dengan email dan password.
|
| 173 |
+
|
| 174 |
+
Request:
|
| 175 |
+
|
| 176 |
+
```json
|
| 177 |
+
{
|
| 178 |
+
"email": "user@example.com",
|
| 179 |
+
"password": "password"
|
| 180 |
+
}
|
| 181 |
+
```
|
| 182 |
+
|
| 183 |
+
Success `200`:
|
| 184 |
+
|
| 185 |
+
```json
|
| 186 |
+
{
|
| 187 |
+
"status": "success",
|
| 188 |
+
"message": "login successful",
|
| 189 |
+
"data": {
|
| 190 |
+
"user": {
|
| 191 |
+
"id": "user-id",
|
| 192 |
+
"email": "user@example.com",
|
| 193 |
+
"fullname": "User Name",
|
| 194 |
+
"role": "user",
|
| 195 |
+
"status": "active"
|
| 196 |
+
},
|
| 197 |
+
"access_token": "jwt-access-token",
|
| 198 |
+
"refresh_token": "opaque-refresh-token",
|
| 199 |
+
"token_type": "Bearer",
|
| 200 |
+
"expires_in": 3600,
|
| 201 |
+
"refresh_expires_in": 604800
|
| 202 |
+
}
|
| 203 |
+
}
|
| 204 |
+
```
|
| 205 |
+
|
| 206 |
+
Errors: `400`, `401`, `403`, `404`, `500`.
|
| 207 |
+
|
| 208 |
+
### `POST /api/refresh`
|
| 209 |
+
|
| 210 |
+
Menukar refresh token aktif dengan token pair baru.
|
| 211 |
+
|
| 212 |
+
Request:
|
| 213 |
+
|
| 214 |
+
```json
|
| 215 |
+
{
|
| 216 |
+
"refresh_token": "opaque-refresh-token"
|
| 217 |
+
}
|
| 218 |
+
```
|
| 219 |
+
|
| 220 |
+
Success `200` mengembalikan bentuk `data` yang sama dengan login, berisi user, access token baru, refresh token baru, `token_type`, `expires_in`, dan `refresh_expires_in`.
|
| 221 |
+
|
| 222 |
+
Errors: `400`, `401`, `403`, `500`.
|
| 223 |
+
|
| 224 |
+
## Documents
|
| 225 |
+
|
| 226 |
+
### Document Model
|
| 227 |
+
|
| 228 |
+
```json
|
| 229 |
+
{
|
| 230 |
+
"id": "document-id",
|
| 231 |
+
"user_id": "user-id",
|
| 232 |
+
"filename": "sales.csv",
|
| 233 |
+
"blob_name": "user-id/document-id/sales.csv",
|
| 234 |
+
"file_size": 2048,
|
| 235 |
+
"file_type": "csv",
|
| 236 |
+
"status": "uploaded",
|
| 237 |
+
"chunks_count": 0,
|
| 238 |
+
"processed_at": "2026-06-30T08:00:00Z",
|
| 239 |
+
"error_message": null,
|
| 240 |
+
"created_at": "2026-06-30T08:00:00Z"
|
| 241 |
+
}
|
| 242 |
+
```
|
| 243 |
+
|
| 244 |
+
Status umum: `uploaded`, `processing`, `processed`, `failed`.
|
| 245 |
+
|
| 246 |
+
### `GET /api/v1/documents/doctypes`
|
| 247 |
+
|
| 248 |
+
Mengambil tipe dokumen yang didukung.
|
| 249 |
+
|
| 250 |
+
Success `200`:
|
| 251 |
+
|
| 252 |
+
```json
|
| 253 |
+
{
|
| 254 |
+
"status": "success",
|
| 255 |
+
"message": "supported document types",
|
| 256 |
+
"data": [
|
| 257 |
+
{
|
| 258 |
+
"type": "pdf",
|
| 259 |
+
"max_size_mb": 10,
|
| 260 |
+
"status": "active",
|
| 261 |
+
"message": null
|
| 262 |
+
}
|
| 263 |
+
]
|
| 264 |
+
}
|
| 265 |
+
```
|
| 266 |
+
|
| 267 |
+
### `POST /api/v1/document/upload`
|
| 268 |
+
|
| 269 |
+
Upload dokumen ke Azure Blob Storage. Maksimum 10 MB. Mendukung `pdf`, `docx`, `txt`, `csv`, dan `xlsx`.
|
| 270 |
+
|
| 271 |
+
Content-Type: `multipart/form-data`
|
| 272 |
+
|
| 273 |
+
Form fields:
|
| 274 |
+
|
| 275 |
+
| Field | Required | Keterangan |
|
| 276 |
+
| --- | --- | --- |
|
| 277 |
+
| `user_id` | Yes | Harus sama dengan user dari token |
|
| 278 |
+
| `file` | Yes | File dokumen |
|
| 279 |
+
|
| 280 |
+
Success `201`: `data` berisi Document Model.
|
| 281 |
+
|
| 282 |
+
Errors: `400`, `401`, `403`, `429`, `500`.
|
| 283 |
+
|
| 284 |
+
### `POST /api/v1/document/upload-local`
|
| 285 |
+
|
| 286 |
+
Upload file ke filesystem lokal untuk benchmarking. Kontrak form sama dengan upload Azure.
|
| 287 |
+
|
| 288 |
+
Success `201`:
|
| 289 |
+
|
| 290 |
+
```json
|
| 291 |
+
{
|
| 292 |
+
"status": "success",
|
| 293 |
+
"message": "file saved locally",
|
| 294 |
+
"data": {
|
| 295 |
+
"path": "files/user-id/sales.csv"
|
| 296 |
+
}
|
| 297 |
+
}
|
| 298 |
+
```
|
| 299 |
+
|
| 300 |
+
### `POST /api/v1/document/process`
|
| 301 |
+
|
| 302 |
+
Memulai proses dokumen secara async. Untuk dokumen unstructured, service melakukan extract text dan embedding jika tersedia. Untuk dokumen tabular, service membuat parquet/catalog source.
|
| 303 |
+
|
| 304 |
+
Request:
|
| 305 |
+
|
| 306 |
+
```json
|
| 307 |
+
{
|
| 308 |
+
"document_id": "document-id",
|
| 309 |
+
"user_id": "user-id"
|
| 310 |
+
}
|
| 311 |
+
```
|
| 312 |
+
|
| 313 |
+
Success `202`:
|
| 314 |
+
|
| 315 |
+
```json
|
| 316 |
+
{
|
| 317 |
+
"status": "success",
|
| 318 |
+
"message": "document processing started",
|
| 319 |
+
"data": {
|
| 320 |
+
"document_id": "document-id",
|
| 321 |
+
"file_type": "csv",
|
| 322 |
+
"status": "processing"
|
| 323 |
+
}
|
| 324 |
+
}
|
| 325 |
+
```
|
| 326 |
+
|
| 327 |
+
Pantau hasilnya lewat `GET /api/v1/documents/{user_id}`.
|
| 328 |
+
|
| 329 |
+
Errors: `400`, `401`, `403`, `404`, `500`.
|
| 330 |
+
|
| 331 |
+
### `GET /api/v1/documents/{user_id}`
|
| 332 |
+
|
| 333 |
+
List dokumen milik user.
|
| 334 |
+
|
| 335 |
+
Success `200`:
|
| 336 |
+
|
| 337 |
+
```json
|
| 338 |
+
{
|
| 339 |
+
"status": "success",
|
| 340 |
+
"message": "documents",
|
| 341 |
+
"data": []
|
| 342 |
+
}
|
| 343 |
+
```
|
| 344 |
+
|
| 345 |
+
Errors: `401`, `403`, `500`.
|
| 346 |
+
|
| 347 |
+
### `DELETE /api/v1/document/delete`
|
| 348 |
+
|
| 349 |
+
Menghapus dokumen dari storage, embedding/parquet terkait, dan record database.
|
| 350 |
+
|
| 351 |
+
Request:
|
| 352 |
+
|
| 353 |
+
```json
|
| 354 |
+
{
|
| 355 |
+
"document_id": "document-id",
|
| 356 |
+
"user_id": "user-id"
|
| 357 |
+
}
|
| 358 |
+
```
|
| 359 |
+
|
| 360 |
+
Success `200`:
|
| 361 |
+
|
| 362 |
+
```json
|
| 363 |
+
{
|
| 364 |
+
"status": "success",
|
| 365 |
+
"message": "document deleted"
|
| 366 |
+
}
|
| 367 |
+
```
|
| 368 |
+
|
| 369 |
+
Errors: `400`, `401`, `403`, `404`, `500`.
|
| 370 |
+
|
| 371 |
+
## Database Clients
|
| 372 |
+
|
| 373 |
+
### DB Client Model
|
| 374 |
+
|
| 375 |
+
```json
|
| 376 |
+
{
|
| 377 |
+
"id": "client-id",
|
| 378 |
+
"user_id": "user-id",
|
| 379 |
+
"name": "Analytics Warehouse",
|
| 380 |
+
"db_type": "postgres",
|
| 381 |
+
"status": "active",
|
| 382 |
+
"created_at": "2026-06-30T08:00:00Z",
|
| 383 |
+
"updated_at": "2026-06-30T08:00:00Z"
|
| 384 |
+
}
|
| 385 |
+
```
|
| 386 |
+
|
| 387 |
+
### `GET /api/v1/database-clients/dbtypes`
|
| 388 |
+
|
| 389 |
+
Mengambil tipe database dan daftar field credential untuk render form dinamis. Saat ini `postgres` aktif; tipe lain dapat muncul sebagai `inactive`.
|
| 390 |
+
|
| 391 |
+
### `POST /api/v1/database-clients`
|
| 392 |
+
|
| 393 |
+
Menyimpan koneksi database. Credentials disimpan terenkripsi. Jika koneksi dengan identity yang sama sudah ada, response `200` mengembalikan client existing.
|
| 394 |
+
|
| 395 |
+
Request:
|
| 396 |
+
|
| 397 |
+
```json
|
| 398 |
+
{
|
| 399 |
+
"user_id": "user-id",
|
| 400 |
+
"name": "Analytics Warehouse",
|
| 401 |
+
"db_type": "postgres",
|
| 402 |
+
"credentials": {
|
| 403 |
+
"host": "db.example.com",
|
| 404 |
+
"port": 5432,
|
| 405 |
+
"database": "analytics",
|
| 406 |
+
"username": "db_user",
|
| 407 |
+
"password": "db_password",
|
| 408 |
+
"ssl_mode": "require"
|
| 409 |
+
}
|
| 410 |
+
}
|
| 411 |
+
```
|
| 412 |
+
|
| 413 |
+
Success:
|
| 414 |
+
|
| 415 |
+
- `201`: database client created
|
| 416 |
+
- `200`: database client already exists
|
| 417 |
+
|
| 418 |
+
Errors: `400`, `401`, `403`, `429`, `500`.
|
| 419 |
+
|
| 420 |
+
### `GET /api/v1/database-clients/{user_id}`
|
| 421 |
+
|
| 422 |
+
List koneksi database milik user.
|
| 423 |
+
|
| 424 |
+
Success `200`:
|
| 425 |
+
|
| 426 |
+
```json
|
| 427 |
+
{
|
| 428 |
+
"status": "success",
|
| 429 |
+
"message": "database clients",
|
| 430 |
+
"data": []
|
| 431 |
+
}
|
| 432 |
+
```
|
| 433 |
+
|
| 434 |
+
Errors: `401`, `403`, `500`.
|
| 435 |
+
|
| 436 |
+
### `GET /api/v1/database-clients/{user_id}/{client_id}`
|
| 437 |
+
|
| 438 |
+
Ambil detail satu koneksi database. Success `200` dengan `data` berisi DB Client Model.
|
| 439 |
+
|
| 440 |
+
Errors: `401`, `403`, `404`, `500`.
|
| 441 |
+
|
| 442 |
+
### `PUT /api/v1/database-clients/{client_id}?user_id={user_id}`
|
| 443 |
+
|
| 444 |
+
Update nama, credentials, atau status koneksi. Semua field body optional, tetapi body harus JSON valid.
|
| 445 |
+
|
| 446 |
+
Request:
|
| 447 |
+
|
| 448 |
+
```json
|
| 449 |
+
{
|
| 450 |
+
"name": "Updated Warehouse",
|
| 451 |
+
"credentials": {
|
| 452 |
+
"host": "db.example.com",
|
| 453 |
+
"port": 5432,
|
| 454 |
+
"database": "analytics",
|
| 455 |
+
"username": "db_user",
|
| 456 |
+
"password": "new_password",
|
| 457 |
+
"ssl_mode": "require"
|
| 458 |
+
},
|
| 459 |
+
"status": "active"
|
| 460 |
+
}
|
| 461 |
+
```
|
| 462 |
+
|
| 463 |
+
Success `200`: `data` berisi DB Client Model.
|
| 464 |
+
|
| 465 |
+
Errors: `400`, `401`, `403`, `404`, `500`.
|
| 466 |
+
|
| 467 |
+
### `DELETE /api/v1/database-clients/{client_id}?user_id={user_id}`
|
| 468 |
+
|
| 469 |
+
Menghapus koneksi database dan memicu pembersihan catalog.
|
| 470 |
+
|
| 471 |
+
Success `200`:
|
| 472 |
+
|
| 473 |
+
```json
|
| 474 |
+
{
|
| 475 |
+
"status": "success",
|
| 476 |
+
"message": "database client deleted"
|
| 477 |
+
}
|
| 478 |
+
```
|
| 479 |
+
|
| 480 |
+
Errors: `400`, `401`, `403`, `404`, `500`.
|
| 481 |
+
|
| 482 |
+
### `POST /api/v1/database-clients/{client_id}/ingest?user_id={user_id}`
|
| 483 |
+
|
| 484 |
+
Melakukan introspection schema database dan menyimpan hasilnya ke catalog. Tidak membutuhkan request body.
|
| 485 |
+
|
| 486 |
+
Success `200`:
|
| 487 |
+
|
| 488 |
+
```json
|
| 489 |
+
{
|
| 490 |
+
"status": "success",
|
| 491 |
+
"message": "schema ingested",
|
| 492 |
+
"data": {
|
| 493 |
+
"tables": []
|
| 494 |
+
}
|
| 495 |
+
}
|
| 496 |
+
```
|
| 497 |
+
|
| 498 |
+
Errors: `400`, `401`, `403`, `404`, `409`, `429`, `500`.
|
| 499 |
+
|
| 500 |
+
## Data Catalog
|
| 501 |
+
|
| 502 |
+
### `POST /api/v1/data-catalog/rebuild`
|
| 503 |
+
|
| 504 |
+
Rebuild seluruh catalog user dari dokumen tabular dan database client aktif.
|
| 505 |
+
|
| 506 |
+
Request:
|
| 507 |
+
|
| 508 |
+
```json
|
| 509 |
+
{
|
| 510 |
+
"user_id": "user-id"
|
| 511 |
+
}
|
| 512 |
+
```
|
| 513 |
+
|
| 514 |
+
Success `200`:
|
| 515 |
+
|
| 516 |
+
```json
|
| 517 |
+
{
|
| 518 |
+
"status": "success",
|
| 519 |
+
"message": "catalog rebuilt",
|
| 520 |
+
"data": {
|
| 521 |
+
"user_id": "user-id",
|
| 522 |
+
"schema_version": "1.0",
|
| 523 |
+
"generated_at": "2026-06-30T08:00:00Z",
|
| 524 |
+
"sources": []
|
| 525 |
+
}
|
| 526 |
+
}
|
| 527 |
+
```
|
| 528 |
+
|
| 529 |
+
Errors: `400`, `401`, `403`, `429`, `500`.
|
| 530 |
+
|
| 531 |
+
### `GET /api/v1/data-catalog/{user_id}`
|
| 532 |
+
|
| 533 |
+
Mengambil index catalog user. Response `sources` berisi ringkasan source, tanpa detail table penuh.
|
| 534 |
+
|
| 535 |
+
Success `200`:
|
| 536 |
+
|
| 537 |
+
```json
|
| 538 |
+
{
|
| 539 |
+
"status": "success",
|
| 540 |
+
"message": "data catalog",
|
| 541 |
+
"data": {
|
| 542 |
+
"user_id": "user-id",
|
| 543 |
+
"schema_version": "1.0",
|
| 544 |
+
"generated_at": "2026-06-30T08:00:00Z",
|
| 545 |
+
"sources": [
|
| 546 |
+
{
|
| 547 |
+
"source_id": "document-or-client-id",
|
| 548 |
+
"source_type": "tabular",
|
| 549 |
+
"name": "sales.csv",
|
| 550 |
+
"location_ref": "blob/path/or/db-ref",
|
| 551 |
+
"table_count": 1,
|
| 552 |
+
"updated_at": "2026-06-30T08:00:00Z"
|
| 553 |
+
}
|
| 554 |
+
]
|
| 555 |
+
}
|
| 556 |
+
}
|
| 557 |
+
```
|
| 558 |
+
|
| 559 |
+
Errors: `401`, `403`, `500`.
|
| 560 |
+
|
| 561 |
+
### `POST /api/v1/analyses/{id}/data-catalog/rebuild`
|
| 562 |
+
|
| 563 |
+
Membangun ulang catalog khusus analysis berdasarkan `data_bind` terbaru milik analysis tersebut. Endpoint ini hanya memakai source yang ter-bind ke analysis, bukan semua knowledge source user.
|
| 564 |
+
|
| 565 |
+
Gunakan endpoint ini setelah perubahan binding jika frontend ingin memicu rebuild secara eksplisit. `PUT /api/v1/analyses/{id}/data-bind` juga melakukan rebuild analysis catalog sebagai bagian dari update binding.
|
| 566 |
+
|
| 567 |
+
Tidak membutuhkan request body.
|
| 568 |
+
|
| 569 |
+
Success `200`:
|
| 570 |
+
|
| 571 |
+
```json
|
| 572 |
+
{
|
| 573 |
+
"status": "success",
|
| 574 |
+
"message": "analysis catalog rebuilt",
|
| 575 |
+
"data": {
|
| 576 |
+
"user_id": "user-id",
|
| 577 |
+
"schema_version": "1.0",
|
| 578 |
+
"generated_at": "2026-06-30T08:00:00Z",
|
| 579 |
+
"sources": []
|
| 580 |
+
}
|
| 581 |
+
}
|
| 582 |
+
```
|
| 583 |
+
|
| 584 |
+
Errors: `400`, `401`, `404`, `500`.
|
| 585 |
+
|
| 586 |
+
### `GET /api/v1/analyses/{id}/data-catalog`
|
| 587 |
+
|
| 588 |
+
Mengambil catalog yang scoped ke analysis dan mengikuti `data_bind` analysis, bukan seluruh catalog user.
|
| 589 |
+
|
| 590 |
+
Success `200`:
|
| 591 |
+
|
| 592 |
+
```json
|
| 593 |
+
{
|
| 594 |
+
"status": "success",
|
| 595 |
+
"message": "analysis data catalog",
|
| 596 |
+
"data": {
|
| 597 |
+
"user_id": "user-id",
|
| 598 |
+
"schema_version": "1.0",
|
| 599 |
+
"generated_at": "2026-06-30T08:00:00Z",
|
| 600 |
+
"sources": []
|
| 601 |
+
}
|
| 602 |
+
}
|
| 603 |
+
```
|
| 604 |
+
|
| 605 |
+
Errors: `401`, `404`.
|
| 606 |
+
|
| 607 |
+
## Analyses
|
| 608 |
+
|
| 609 |
+
### Data Bind Item
|
| 610 |
+
|
| 611 |
+
`data_bind` adalah daftar source yang dipilih user untuk analysis.
|
| 612 |
+
|
| 613 |
+
```json
|
| 614 |
+
{
|
| 615 |
+
"id": "source-id",
|
| 616 |
+
"name": "sales.csv",
|
| 617 |
+
"group_type": "document",
|
| 618 |
+
"type": "csv"
|
| 619 |
+
}
|
| 620 |
+
```
|
| 621 |
+
|
| 622 |
+
Field:
|
| 623 |
+
|
| 624 |
+
| Field | Required | Keterangan |
|
| 625 |
+
| --- | --- | --- |
|
| 626 |
+
| `id` | Yes | `document.id` atau `database_client.id` |
|
| 627 |
+
| `name` | Yes | Nama yang ditampilkan di UI |
|
| 628 |
+
| `group_type` | Yes | `document` atau `database` |
|
| 629 |
+
| `type` | Yes | File type (`csv`, `pdf`, `xlsx`) atau database type (`postgres`) |
|
| 630 |
+
|
| 631 |
+
Rules:
|
| 632 |
+
|
| 633 |
+
- `data_bind` wajib berisi minimal satu source.
|
| 634 |
+
- Semua source harus milik user yang sedang login.
|
| 635 |
+
- Duplicate source dalam satu `data_bind` ditolak.
|
| 636 |
+
|
| 637 |
+
### Analysis Model
|
| 638 |
+
|
| 639 |
+
```json
|
| 640 |
+
{
|
| 641 |
+
"id": "analysis-id",
|
| 642 |
+
"user_id": "user-id",
|
| 643 |
+
"analysis_title": "Q3 Revenue Analysis",
|
| 644 |
+
"objective": "Find revenue movement and root cause",
|
| 645 |
+
"business_questions": [
|
| 646 |
+
"Why did revenue drop in Q3?",
|
| 647 |
+
"Which customer segment contributed the most to the change?"
|
| 648 |
+
],
|
| 649 |
+
"status": "active",
|
| 650 |
+
"data_bind": [
|
| 651 |
+
{
|
| 652 |
+
"id": "document-id",
|
| 653 |
+
"name": "sales.csv",
|
| 654 |
+
"group_type": "document",
|
| 655 |
+
"type": "csv"
|
| 656 |
+
}
|
| 657 |
+
],
|
| 658 |
+
"data_bind_version": 1,
|
| 659 |
+
"report_collection": [],
|
| 660 |
+
"created_at": "2026-06-30T08:00:00Z",
|
| 661 |
+
"updated_at": "2026-06-30T08:00:00Z"
|
| 662 |
+
}
|
| 663 |
+
```
|
| 664 |
+
|
| 665 |
+
### `POST /api/v1/analyses`
|
| 666 |
+
|
| 667 |
+
Membuat analysis aktif dengan source binding awal.
|
| 668 |
+
|
| 669 |
+
Request:
|
| 670 |
+
|
| 671 |
+
```json
|
| 672 |
+
{
|
| 673 |
+
"analysis_title": "Q3 Revenue Analysis",
|
| 674 |
+
"objective": "Find revenue movement and root cause",
|
| 675 |
+
"business_questions": [
|
| 676 |
+
"Why did revenue drop in Q3?",
|
| 677 |
+
"Which customer segment contributed the most to the change?"
|
| 678 |
+
],
|
| 679 |
+
"data_bind": [
|
| 680 |
+
{
|
| 681 |
+
"id": "document-id",
|
| 682 |
+
"name": "sales.csv",
|
| 683 |
+
"group_type": "document",
|
| 684 |
+
"type": "csv"
|
| 685 |
+
},
|
| 686 |
+
{
|
| 687 |
+
"id": "database-client-id",
|
| 688 |
+
"name": "Analytics Warehouse",
|
| 689 |
+
"group_type": "database",
|
| 690 |
+
"type": "postgres"
|
| 691 |
+
}
|
| 692 |
+
]
|
| 693 |
+
}
|
| 694 |
+
```
|
| 695 |
+
|
| 696 |
+
Success `201`: `data` berisi Analysis Model.
|
| 697 |
+
|
| 698 |
+
Validation:
|
| 699 |
+
|
| 700 |
+
- `business_questions` wajib berisi minimal satu string non-empty.
|
| 701 |
+
- `data_bind` wajib berisi minimal satu source.
|
| 702 |
+
|
| 703 |
+
Errors: `400`, `401`, `409`.
|
| 704 |
+
|
| 705 |
+
### `GET /api/v1/analyses`
|
| 706 |
+
|
| 707 |
+
List analysis milik user.
|
| 708 |
+
|
| 709 |
+
Query:
|
| 710 |
+
|
| 711 |
+
| Query | Default | Keterangan |
|
| 712 |
+
| --- | --- | --- |
|
| 713 |
+
| `status` | `active` | `active` atau `inactive` |
|
| 714 |
+
| `page` | `1` | Nomor halaman |
|
| 715 |
+
| `limit` | `20` | Maksimum `100` |
|
| 716 |
+
|
| 717 |
+
Success `200`:
|
| 718 |
+
|
| 719 |
+
```json
|
| 720 |
+
{
|
| 721 |
+
"status": "success",
|
| 722 |
+
"message": "Analyses retrieved",
|
| 723 |
+
"data": {
|
| 724 |
+
"analyses": [],
|
| 725 |
+
"pagination": {
|
| 726 |
+
"page": 1,
|
| 727 |
+
"limit": 20
|
| 728 |
+
}
|
| 729 |
+
}
|
| 730 |
+
}
|
| 731 |
+
```
|
| 732 |
+
|
| 733 |
+
Errors: `401`, `500`.
|
| 734 |
+
|
| 735 |
+
### `GET /api/v1/analyses/{id}`
|
| 736 |
+
|
| 737 |
+
Ambil detail analysis milik user. Success `200` dengan `data` berisi Analysis Model.
|
| 738 |
+
|
| 739 |
+
Errors: `400`, `401`, `404`.
|
| 740 |
+
|
| 741 |
+
### `PATCH /api/v1/analyses/{id}`
|
| 742 |
+
|
| 743 |
+
Update metadata analysis. Field optional.
|
| 744 |
+
|
| 745 |
+
Request:
|
| 746 |
+
|
| 747 |
+
```json
|
| 748 |
+
{
|
| 749 |
+
"analysis_title": "Updated title",
|
| 750 |
+
"objective": "Updated objective",
|
| 751 |
+
"status": "inactive"
|
| 752 |
+
}
|
| 753 |
+
```
|
| 754 |
+
|
| 755 |
+
`status` hanya `active` atau `inactive`.
|
| 756 |
+
|
| 757 |
+
Success `200`: `data` berisi Analysis Model.
|
| 758 |
+
|
| 759 |
+
Errors: `400`, `401`, `404`.
|
| 760 |
+
|
| 761 |
+
### `DELETE /api/v1/analyses/{id}`
|
| 762 |
+
|
| 763 |
+
Hapus analysis milik user.
|
| 764 |
+
|
| 765 |
+
Success `204` tanpa response body.
|
| 766 |
+
|
| 767 |
+
Errors: `400`, `401`, `404`.
|
| 768 |
+
|
| 769 |
+
### `PUT /api/v1/analyses/{id}/data-bind`
|
| 770 |
+
|
| 771 |
+
Mengganti daftar source yang ter-bind ke analysis secara atomic dengan optimistic version check. Jika update berhasil, service juga rebuild catalog scope analysis dari `data_bind` terbaru. Jika rebuild catalog gagal, perubahan binding ditolak/rollback.
|
| 772 |
+
|
| 773 |
+
Request:
|
| 774 |
+
|
| 775 |
+
```json
|
| 776 |
+
{
|
| 777 |
+
"expected_version": 1,
|
| 778 |
+
"data_bind": [
|
| 779 |
+
{
|
| 780 |
+
"id": "new-document-id",
|
| 781 |
+
"name": "updated-sales.csv",
|
| 782 |
+
"group_type": "document",
|
| 783 |
+
"type": "csv"
|
| 784 |
+
}
|
| 785 |
+
]
|
| 786 |
+
}
|
| 787 |
+
```
|
| 788 |
+
|
| 789 |
+
Success `200`: `data` berisi Analysis Model dengan `data_bind_version` yang sudah naik.
|
| 790 |
+
|
| 791 |
+
Errors:
|
| 792 |
+
|
| 793 |
+
- `400`: payload invalid, empty binding, source invalid
|
| 794 |
+
- `401`: token invalid/missing
|
| 795 |
+
- `409`: stale `expected_version`, inactive analysis, atau limit violation
|
| 796 |
+
|
| 797 |
+
## Analysis Messages
|
| 798 |
+
|
| 799 |
+
### Message Model
|
| 800 |
+
|
| 801 |
+
```json
|
| 802 |
+
{
|
| 803 |
+
"id": "message-id",
|
| 804 |
+
"analysis_id": "analysis-id",
|
| 805 |
+
"user_id": "user-id",
|
| 806 |
+
"role": "user",
|
| 807 |
+
"content": "Apa penyebab revenue turun di Q3?",
|
| 808 |
+
"created_at": "2026-06-30T08:00:00Z"
|
| 809 |
+
}
|
| 810 |
+
```
|
| 811 |
+
|
| 812 |
+
`role` hanya:
|
| 813 |
+
|
| 814 |
+
- `user`: pertanyaan atau instruksi dari user
|
| 815 |
+
- `ai`: jawaban dari AI Agent Service
|
| 816 |
+
|
| 817 |
+
### `POST /api/v1/analyses/{id}/messages`
|
| 818 |
+
|
| 819 |
+
Merekam tepat satu pesan conversation ke `analyses_messages`.
|
| 820 |
+
|
| 821 |
+
Request untuk pertanyaan user:
|
| 822 |
+
|
| 823 |
+
```json
|
| 824 |
+
{
|
| 825 |
+
"role": "user",
|
| 826 |
+
"content": "Apa penyebab revenue turun di Q3?"
|
| 827 |
+
}
|
| 828 |
+
```
|
| 829 |
+
|
| 830 |
+
Request untuk jawaban AI:
|
| 831 |
+
|
| 832 |
+
```json
|
| 833 |
+
{
|
| 834 |
+
"role": "ai",
|
| 835 |
+
"content": "Revenue turun karena penurunan transaksi enterprise dan kenaikan churn di wilayah barat."
|
| 836 |
+
}
|
| 837 |
+
```
|
| 838 |
+
|
| 839 |
+
Success `201`:
|
| 840 |
+
|
| 841 |
+
```json
|
| 842 |
+
{
|
| 843 |
+
"status": "success",
|
| 844 |
+
"message": "Message created",
|
| 845 |
+
"data": {
|
| 846 |
+
"message": {
|
| 847 |
+
"id": "message-id",
|
| 848 |
+
"analysis_id": "analysis-id",
|
| 849 |
+
"user_id": "user-id",
|
| 850 |
+
"role": "user",
|
| 851 |
+
"content": "Apa penyebab revenue turun di Q3?",
|
| 852 |
+
"created_at": "2026-06-30T08:00:00Z"
|
| 853 |
+
}
|
| 854 |
+
}
|
| 855 |
+
}
|
| 856 |
+
```
|
| 857 |
+
|
| 858 |
+
Errors:
|
| 859 |
+
|
| 860 |
+
- `400`: invalid role/content, invalid analysis ID
|
| 861 |
+
- `401`: token invalid/missing
|
| 862 |
+
- `409`: inactive analysis atau message limit tercapai
|
| 863 |
+
|
| 864 |
+
### `GET /api/v1/analyses/{id}/messages`
|
| 865 |
+
|
| 866 |
+
Mengambil riwayat conversation analysis.
|
| 867 |
+
|
| 868 |
+
Query:
|
| 869 |
+
|
| 870 |
+
| Query | Default | Keterangan |
|
| 871 |
+
| --- | --- | --- |
|
| 872 |
+
| `limit` | `100` | Maksimum `100` |
|
| 873 |
+
|
| 874 |
+
Success `200`:
|
| 875 |
+
|
| 876 |
+
```json
|
| 877 |
+
{
|
| 878 |
+
"status": "success",
|
| 879 |
+
"message": "Messages retrieved",
|
| 880 |
+
"data": {
|
| 881 |
+
"messages": []
|
| 882 |
+
}
|
| 883 |
+
}
|
| 884 |
+
```
|
| 885 |
+
|
| 886 |
+
Errors: `400`, `401`, `404`.
|
| 887 |
+
|
| 888 |
+
## Suggested Frontend Integration Sequence
|
| 889 |
+
|
| 890 |
+
### Login
|
| 891 |
+
|
| 892 |
+
```text
|
| 893 |
+
POST /api/login
|
| 894 |
+
store access_token, refresh_token, user.id
|
| 895 |
+
```
|
| 896 |
+
|
| 897 |
+
### Upload and Process File
|
| 898 |
+
|
| 899 |
+
```text
|
| 900 |
+
GET /api/v1/documents/doctypes
|
| 901 |
+
POST /api/v1/document/upload
|
| 902 |
+
POST /api/v1/document/process
|
| 903 |
+
GET /api/v1/documents/{user_id}
|
| 904 |
+
```
|
| 905 |
+
|
| 906 |
+
### Connect Database
|
| 907 |
+
|
| 908 |
+
```text
|
| 909 |
+
GET /api/v1/database-clients/dbtypes
|
| 910 |
+
POST /api/v1/database-clients
|
| 911 |
+
POST /api/v1/database-clients/{client_id}/ingest?user_id={user_id}
|
| 912 |
+
GET /api/v1/database-clients/{user_id}
|
| 913 |
+
```
|
| 914 |
+
|
| 915 |
+
### Generate Knowledge Catalog
|
| 916 |
+
|
| 917 |
+
```text
|
| 918 |
+
POST /api/v1/data-catalog/rebuild
|
| 919 |
+
GET /api/v1/data-catalog/{user_id}
|
| 920 |
+
```
|
| 921 |
+
|
| 922 |
+
### Create Analysis and Start Conversation
|
| 923 |
+
|
| 924 |
+
```text
|
| 925 |
+
POST /api/v1/analyses with business_questions
|
| 926 |
+
call AI Agent Service outside this service
|
| 927 |
+
POST /api/v1/analyses/{analysis_id}/messages with role=user and content=user_question
|
| 928 |
+
POST /api/v1/analyses/{analysis_id}/messages with role=ai and content=agent_answer
|
| 929 |
+
GET /api/v1/analyses/{analysis_id}/messages
|
| 930 |
+
```
|
| 931 |
+
|
| 932 |
+
## Important Frontend Notes
|
| 933 |
+
|
| 934 |
+
- Jangan mengirim pesan user ke `POST /api/v1/analyses/{id}/messages` dengan ekspektasi service ini akan menjawab. Endpoint ini hanya persistence.
|
| 935 |
+
- AI Agent Service adalah service terpisah. Service ini menyimpan metadata analysis, knowledge catalog, data binding, dan history conversation.
|
| 936 |
+
- User-level catalog berisi seluruh knowledge source user; analysis-level catalog hanya berisi source yang ada di `data_bind` analysis.
|
| 937 |
+
- Perubahan `data_bind` akan rebuild analysis-level catalog. Frontend juga dapat memanggil endpoint rebuild analysis catalog secara eksplisit jika diperlukan.
|
| 938 |
+
- Setelah refresh token sukses, selalu replace refresh token lama dengan refresh token baru.
|
| 939 |
+
- Untuk endpoint yang membutuhkan `user_id`, gunakan `data.user.id` dari login dan pastikan sama dengan token aktif.
|
| 940 |
+
- Untuk binding analysis, pakai source yang sudah berhasil diupload/diproses atau database client yang sudah diingest.
|
| 941 |
+
|
| 942 |
+
|
| 943 |
+
|
| 944 |
+
|
| 945 |
+
|
| 946 |
+
|
| 947 |
+
|
API_CONTRACT_BE_PYTHON.md
ADDED
|
@@ -0,0 +1,521 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Backend Agentic Service API Contract
|
| 2 |
+
|
| 3 |
+
This document describes the Python agentic backend used by the frontend for AI chat, help/report tools, and observability data shown alongside chat answers.
|
| 4 |
+
|
| 5 |
+
Base path examples use relative URLs. Configure the frontend with the deployed Python service base URL.
|
| 6 |
+
|
| 7 |
+
## Overview
|
| 8 |
+
|
| 9 |
+
The Python backend owns the generative AI interaction surface:
|
| 10 |
+
|
| 11 |
+
1. Stream chat answers from the AI agent.
|
| 12 |
+
2. Execute tool-style actions for help and report generation.
|
| 13 |
+
3. Return report versions and report details.
|
| 14 |
+
4. Return observability/provenance for a completed assistant answer.
|
| 15 |
+
|
| 16 |
+
The frontend uses this service during the analysis conversation flow:
|
| 17 |
+
|
| 18 |
+
1. User sends a chat message.
|
| 19 |
+
2. Frontend calls `POST /api/v2/chat/stream` and renders the streamed answer.
|
| 20 |
+
3. When the stream emits `done`, frontend stores or reads the returned `message_id`.
|
| 21 |
+
4. Frontend calls `GET /api/v1/observability` for planning, tool calls, and source provenance.
|
| 22 |
+
5. Frontend calls `/api/v1/tools/help` for guided help and `/api/v1/tools/report` for report generation.
|
| 23 |
+
|
| 24 |
+
## Endpoint Summary
|
| 25 |
+
|
| 26 |
+
| Method | Path | Purpose |
|
| 27 |
+
| --- | --- | --- |
|
| 28 |
+
| `POST` | `/api/v2/chat/stream` | Stream an AI chat answer for one analysis conversation. |
|
| 29 |
+
| `GET` | `/api/v1/tools/list` | List available frontend tools. |
|
| 30 |
+
| `POST` | `/api/v1/tools/help` | Stream contextual help for the current analysis conversation. |
|
| 31 |
+
| `POST` | `/api/v1/tools/report` | Generate and persist a new report version. |
|
| 32 |
+
| `GET` | `/api/v1/tools/report/{analysis_id}` | List report versions for an analysis. |
|
| 33 |
+
| `GET` | `/api/v1/tools/report/{analysis_id}/{version}` | Retrieve one report version. |
|
| 34 |
+
| `GET` | `/api/v1/observability` | Retrieve provenance for one assistant answer. |
|
| 35 |
+
|
| 36 |
+
## Common Concepts
|
| 37 |
+
|
| 38 |
+
### Identifiers
|
| 39 |
+
|
| 40 |
+
- `user_id`: user identifier passed by the frontend.
|
| 41 |
+
- `analysis_id`: analysis conversation identifier.
|
| 42 |
+
- `message_id`: assistant answer identifier used to correlate chat streaming with observability.
|
| 43 |
+
|
| 44 |
+
### Server-Sent Events
|
| 45 |
+
|
| 46 |
+
Chat and help endpoints return `text/event-stream`.
|
| 47 |
+
|
| 48 |
+
Frontend should parse events by `event` name and `data` payload. Blank lines separate SSE events.
|
| 49 |
+
|
| 50 |
+
Common event types:
|
| 51 |
+
|
| 52 |
+
| Event | Data | Meaning |
|
| 53 |
+
| --- | --- | --- |
|
| 54 |
+
| `sources` | JSON array | Sources available early in the stream. May be empty. |
|
| 55 |
+
| `status` | text | Optional progress update for slower paths. |
|
| 56 |
+
| `chunk` | text | Answer text fragment. Concatenate chunks in order. |
|
| 57 |
+
| `done` | JSON object | Terminal success event. Includes `message_id`. |
|
| 58 |
+
| `error` | text | Terminal error event. Stream stops after this. |
|
| 59 |
+
|
| 60 |
+
The stream carries answer text only. Planning, tool call details, and full provenance are fetched from `GET /api/v1/observability` after the stream is done.
|
| 61 |
+
|
| 62 |
+
## Chat
|
| 63 |
+
|
| 64 |
+
### `POST /api/v2/chat/stream`
|
| 65 |
+
|
| 66 |
+
Streams an AI answer for one user message in an analysis conversation.
|
| 67 |
+
|
| 68 |
+
Request body:
|
| 69 |
+
|
| 70 |
+
```json
|
| 71 |
+
{
|
| 72 |
+
"user_id": "u_1a2b3c",
|
| 73 |
+
"analysis_id": "an_42",
|
| 74 |
+
"message_id": "msg_88f1",
|
| 75 |
+
"message": "What were total sales by region last quarter?"
|
| 76 |
+
}
|
| 77 |
+
```
|
| 78 |
+
|
| 79 |
+
Fields:
|
| 80 |
+
|
| 81 |
+
| Field | Required | Description |
|
| 82 |
+
| --- | --- | --- |
|
| 83 |
+
| `user_id` | Yes | User identifier. |
|
| 84 |
+
| `analysis_id` | Yes | Analysis conversation identifier. |
|
| 85 |
+
| `message_id` | No | Assistant answer id for observability correlation. If omitted, Python returns one in `done`. |
|
| 86 |
+
| `message` | Yes | User message text. |
|
| 87 |
+
|
| 88 |
+
Response: `text/event-stream`.
|
| 89 |
+
|
| 90 |
+
Example structured answer:
|
| 91 |
+
|
| 92 |
+
```text
|
| 93 |
+
event: sources
|
| 94 |
+
data: [{"document_id":"u_1a2b3c_orders","filename":"orders","page_label":null}]
|
| 95 |
+
|
| 96 |
+
event: status
|
| 97 |
+
data: Planning analysis...
|
| 98 |
+
|
| 99 |
+
event: status
|
| 100 |
+
data: Running 3 steps...
|
| 101 |
+
|
| 102 |
+
event: chunk
|
| 103 |
+
data: Total sales by region last quarter:
|
| 104 |
+
|
| 105 |
+
event: chunk
|
| 106 |
+
data: Central led at $1.21M (38%), East $0.74M, West $0.55M (down 12% QoQ).
|
| 107 |
+
|
| 108 |
+
event: done
|
| 109 |
+
data: {"message_id":"msg_88f1"}
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
Example simple chat answer:
|
| 113 |
+
|
| 114 |
+
```text
|
| 115 |
+
event: sources
|
| 116 |
+
data: []
|
| 117 |
+
|
| 118 |
+
event: chunk
|
| 119 |
+
data: I'm your AI data analyst. Connect a source or ask a question to get started.
|
| 120 |
+
|
| 121 |
+
event: done
|
| 122 |
+
data: {"message_id":"msg_12"}
|
| 123 |
+
```
|
| 124 |
+
|
| 125 |
+
Behavior notes:
|
| 126 |
+
|
| 127 |
+
- Greeting and farewell messages may use a fast canned path.
|
| 128 |
+
- Stateless `chat` intent may use a 1-hour Redis response cache.
|
| 129 |
+
- The router may classify messages into intents such as `chat`, `help`, `check`, `unstructured_flow`, or `structured_flow`.
|
| 130 |
+
- `sources` can be empty for chat/help/error paths.
|
| 131 |
+
- `status` events are optional and should be safe for the frontend to ignore.
|
| 132 |
+
|
| 133 |
+
## Tools
|
| 134 |
+
|
| 135 |
+
### `GET /api/v1/tools/list`
|
| 136 |
+
|
| 137 |
+
Returns the deterministic list of tools available to the frontend.
|
| 138 |
+
|
| 139 |
+
Request: none.
|
| 140 |
+
|
| 141 |
+
Response `200`:
|
| 142 |
+
|
| 143 |
+
```json
|
| 144 |
+
{
|
| 145 |
+
"count": 2,
|
| 146 |
+
"tools": [
|
| 147 |
+
{
|
| 148 |
+
"command": "/help",
|
| 149 |
+
"name": "help",
|
| 150 |
+
"type": "skill",
|
| 151 |
+
"description": "Show what the assistant can do and guide your next step."
|
| 152 |
+
},
|
| 153 |
+
{
|
| 154 |
+
"command": "/report",
|
| 155 |
+
"name": "report",
|
| 156 |
+
"type": "skill",
|
| 157 |
+
"description": "Generate a versioned analysis report with background, EDA, key findings, and insights."
|
| 158 |
+
}
|
| 159 |
+
]
|
| 160 |
+
}
|
| 161 |
+
```
|
| 162 |
+
|
| 163 |
+
Tool item shape:
|
| 164 |
+
|
| 165 |
+
```json
|
| 166 |
+
{
|
| 167 |
+
"command": "/help",
|
| 168 |
+
"name": "help",
|
| 169 |
+
"type": "skill",
|
| 170 |
+
"description": "Show what the assistant can do and guide your next step."
|
| 171 |
+
}
|
| 172 |
+
```
|
| 173 |
+
|
| 174 |
+
Frontend behavior:
|
| 175 |
+
|
| 176 |
+
- Surface `/help` in the slash menu.
|
| 177 |
+
- Surface report generation as a button or explicit UI action.
|
| 178 |
+
|
| 179 |
+
### `POST /api/v1/tools/help`
|
| 180 |
+
|
| 181 |
+
Streams contextual guidance for the current analysis conversation.
|
| 182 |
+
|
| 183 |
+
Request body:
|
| 184 |
+
|
| 185 |
+
```json
|
| 186 |
+
{
|
| 187 |
+
"user_id": "u_1a2b3c",
|
| 188 |
+
"analysis_id": "an_42"
|
| 189 |
+
}
|
| 190 |
+
```
|
| 191 |
+
|
| 192 |
+
Response: `text/event-stream` using the same event shape as chat.
|
| 193 |
+
|
| 194 |
+
Help responses usually emit `sources: []` and no `status` pings.
|
| 195 |
+
|
| 196 |
+
Example:
|
| 197 |
+
|
| 198 |
+
```text
|
| 199 |
+
event: sources
|
| 200 |
+
data: []
|
| 201 |
+
|
| 202 |
+
event: chunk
|
| 203 |
+
data: Your goal is set. You can start exploring now. Try a question like "average order value by month", then I can generate a report.
|
| 204 |
+
|
| 205 |
+
event: done
|
| 206 |
+
data: {"message_id":"msg_h7"}
|
| 207 |
+
```
|
| 208 |
+
|
| 209 |
+
## Reports
|
| 210 |
+
|
| 211 |
+
### `POST /api/v1/tools/report`
|
| 212 |
+
|
| 213 |
+
Generates, persists, and returns a new report version for an analysis.
|
| 214 |
+
|
| 215 |
+
Query params:
|
| 216 |
+
|
| 217 |
+
| Query | Required | Description |
|
| 218 |
+
| --- | --- | --- |
|
| 219 |
+
| `analysis_id` | Yes | Analysis identifier. |
|
| 220 |
+
| `user_id` | Yes | User identifier. |
|
| 221 |
+
|
| 222 |
+
Example:
|
| 223 |
+
|
| 224 |
+
```text
|
| 225 |
+
POST /api/v1/tools/report?analysis_id=an_42&user_id=u_1a2b3c
|
| 226 |
+
```
|
| 227 |
+
|
| 228 |
+
Status codes:
|
| 229 |
+
|
| 230 |
+
| Status | Meaning |
|
| 231 |
+
| --- | --- |
|
| 232 |
+
| `201` | New report version generated. |
|
| 233 |
+
| `409` | Report floor/precondition not met. |
|
| 234 |
+
| `500` | Generation or persistence failed. |
|
| 235 |
+
|
| 236 |
+
Response `201`:
|
| 237 |
+
|
| 238 |
+
```json
|
| 239 |
+
{
|
| 240 |
+
"report_id": "8f3a2b1c9d4e4f6a8b0c1d2e3f4a5b6c",
|
| 241 |
+
"analysis_id": "an_42",
|
| 242 |
+
"user_id": "u_1a2b3c",
|
| 243 |
+
"version": 2,
|
| 244 |
+
"generated_at": "2026-06-30T09:14:33.512Z",
|
| 245 |
+
"problem_statement": {
|
| 246 |
+
"objective": "Understand which regions drive revenue and why Q1 dipped.",
|
| 247 |
+
"business_questions": [
|
| 248 |
+
"Which regions contribute most to total revenue?",
|
| 249 |
+
"Did any region decline quarter-over-quarter?"
|
| 250 |
+
]
|
| 251 |
+
},
|
| 252 |
+
"record_ids": ["rec_a1", "rec_b2"],
|
| 253 |
+
"executive_summary": "Revenue is concentrated in the Central region (38% of total). The West was the only region to contract, down 12% QoQ, the main driver of the Q1 dip.",
|
| 254 |
+
"findings": [
|
| 255 |
+
{
|
| 256 |
+
"text": "Central region contributed 38% of total revenue, the largest share.",
|
| 257 |
+
"record_ids": ["rec_a1"],
|
| 258 |
+
"supporting_data": null
|
| 259 |
+
},
|
| 260 |
+
{
|
| 261 |
+
"text": "West region revenue fell 12% quarter-over-quarter.",
|
| 262 |
+
"record_ids": ["rec_b2"],
|
| 263 |
+
"supporting_data": null
|
| 264 |
+
}
|
| 265 |
+
],
|
| 266 |
+
"caveats": [
|
| 267 |
+
{
|
| 268 |
+
"text": "March data for the East region was partially missing, around 6% of rows.",
|
| 269 |
+
"record_ids": ["rec_b2"]
|
| 270 |
+
}
|
| 271 |
+
],
|
| 272 |
+
"open_questions": [
|
| 273 |
+
{
|
| 274 |
+
"text": "What drove the West region's QoQ decline?",
|
| 275 |
+
"record_ids": ["rec_b2"]
|
| 276 |
+
}
|
| 277 |
+
],
|
| 278 |
+
"data_sources": [
|
| 279 |
+
{
|
| 280 |
+
"source_id": "src_sales_db",
|
| 281 |
+
"name": "orders",
|
| 282 |
+
"source_type": "postgres",
|
| 283 |
+
"detail": {
|
| 284 |
+
"tables": ["orders"],
|
| 285 |
+
"row_count": 48213,
|
| 286 |
+
"columns": ["region", "amount", "ordered_at"]
|
| 287 |
+
}
|
| 288 |
+
}
|
| 289 |
+
],
|
| 290 |
+
"method_steps": [
|
| 291 |
+
{
|
| 292 |
+
"task_id": "t1",
|
| 293 |
+
"stage": "data_understanding",
|
| 294 |
+
"objective": "Inventory the sales source",
|
| 295 |
+
"status": "success",
|
| 296 |
+
"tools_used": ["check_data"]
|
| 297 |
+
},
|
| 298 |
+
{
|
| 299 |
+
"task_id": "t2",
|
| 300 |
+
"stage": "modeling",
|
| 301 |
+
"objective": "Aggregate revenue by region",
|
| 302 |
+
"status": "success",
|
| 303 |
+
"tools_used": ["analyze_aggregate"]
|
| 304 |
+
}
|
| 305 |
+
],
|
| 306 |
+
"rendered_markdown": "# Analysis Report\n\n*Generated 2026-06-30 by u_1a2b3c*\n\n## Objective\nUnderstand which regions drive revenue..."
|
| 307 |
+
}
|
| 308 |
+
```
|
| 309 |
+
|
| 310 |
+
Response `409`:
|
| 311 |
+
|
| 312 |
+
```json
|
| 313 |
+
{
|
| 314 |
+
"detail": "Not ready to generate a report - still needs at least one completed analysis."
|
| 315 |
+
}
|
| 316 |
+
```
|
| 317 |
+
|
| 318 |
+
Precondition:
|
| 319 |
+
|
| 320 |
+
- Reports require at least one completed analysis record for the session.
|
| 321 |
+
- If slow-path analysis recording is disabled, report generation can return `409` by design.
|
| 322 |
+
|
| 323 |
+
### `GET /api/v1/tools/report/{analysis_id}`
|
| 324 |
+
|
| 325 |
+
Lists report versions for one analysis, oldest first.
|
| 326 |
+
|
| 327 |
+
Response `200`:
|
| 328 |
+
|
| 329 |
+
```json
|
| 330 |
+
[
|
| 331 |
+
{
|
| 332 |
+
"report_id": "1b2c3d4e",
|
| 333 |
+
"version": 1,
|
| 334 |
+
"generated_at": "2026-06-24T15:02:11Z",
|
| 335 |
+
"record_count": 1
|
| 336 |
+
},
|
| 337 |
+
{
|
| 338 |
+
"report_id": "8f3a2b1c",
|
| 339 |
+
"version": 2,
|
| 340 |
+
"generated_at": "2026-06-25T09:14:33Z",
|
| 341 |
+
"record_count": 2
|
| 342 |
+
}
|
| 343 |
+
]
|
| 344 |
+
```
|
| 345 |
+
|
| 346 |
+
If no reports exist, returns `[]`.
|
| 347 |
+
|
| 348 |
+
### `GET /api/v1/tools/report/{analysis_id}/{version}`
|
| 349 |
+
|
| 350 |
+
Returns one report version. Shape is the same as the `201` response from `POST /api/v1/tools/report`.
|
| 351 |
+
|
| 352 |
+
Response `404`:
|
| 353 |
+
|
| 354 |
+
```json
|
| 355 |
+
{
|
| 356 |
+
"detail": "No report v3 for analysis 'an_42'."
|
| 357 |
+
}
|
| 358 |
+
```
|
| 359 |
+
|
| 360 |
+
## Observability
|
| 361 |
+
|
| 362 |
+
### `GET /api/v1/observability`
|
| 363 |
+
|
| 364 |
+
Returns Responsible AI provenance for one assistant answer.
|
| 365 |
+
|
| 366 |
+
The frontend should call this after the chat/help stream emits `done`, using the `message_id` from the `done` event. If the row is not ready yet, the frontend may poll until `200` or stop on `404` according to product behavior.
|
| 367 |
+
|
| 368 |
+
Query params:
|
| 369 |
+
|
| 370 |
+
| Query | Required | Description |
|
| 371 |
+
| --- | --- | --- |
|
| 372 |
+
| `analysis_id` | Yes | Analysis identifier. |
|
| 373 |
+
| `message_id` | Yes | Assistant answer identifier returned by the stream. |
|
| 374 |
+
|
| 375 |
+
Example:
|
| 376 |
+
|
| 377 |
+
```text
|
| 378 |
+
GET /api/v1/observability?analysis_id=an_42&message_id=msg_88f1
|
| 379 |
+
```
|
| 380 |
+
|
| 381 |
+
Field rules:
|
| 382 |
+
|
| 383 |
+
- `planning`: present only when the planner ran; otherwise `null`.
|
| 384 |
+
- `thinking`: optional reasoning summary; `null` if unavailable.
|
| 385 |
+
- `tool_calls`: every invoked tool with input, output, and status; empty for pure chat or greeting paths.
|
| 386 |
+
- `sources`: required for retrieval flows; empty for chat/help paths that do not reference data.
|
| 387 |
+
|
| 388 |
+
Response `200` for `structured_flow`:
|
| 389 |
+
|
| 390 |
+
```json
|
| 391 |
+
{
|
| 392 |
+
"analysis_id": "an_42",
|
| 393 |
+
"message_id": "msg_88f1",
|
| 394 |
+
"intent": "structured_flow",
|
| 395 |
+
"generated_at": "2026-06-30T03:21:09.114Z",
|
| 396 |
+
"planning": {
|
| 397 |
+
"goal_restated": "Find which regions drive revenue and why Q1 dipped.",
|
| 398 |
+
"assumptions": ["'last quarter' = Q1 2026"],
|
| 399 |
+
"steps": [
|
| 400 |
+
{
|
| 401 |
+
"step": 1,
|
| 402 |
+
"stage": "data_understanding",
|
| 403 |
+
"objective": "Inventory the sales source"
|
| 404 |
+
},
|
| 405 |
+
{
|
| 406 |
+
"step": 2,
|
| 407 |
+
"stage": "modeling",
|
| 408 |
+
"objective": "Aggregate revenue by region"
|
| 409 |
+
}
|
| 410 |
+
]
|
| 411 |
+
},
|
| 412 |
+
"thinking": "The question needs a per-region breakdown plus a cause, so I inventory the source, aggregate revenue by region, then compare quarters.",
|
| 413 |
+
"tool_calls": [
|
| 414 |
+
{
|
| 415 |
+
"order": 1,
|
| 416 |
+
"name": "check_data",
|
| 417 |
+
"input": { "source_hint": "structured" },
|
| 418 |
+
"output": { "kind": "table", "summary": "1 source, 1 table, 48,213 rows" },
|
| 419 |
+
"status": "success"
|
| 420 |
+
},
|
| 421 |
+
{
|
| 422 |
+
"order": 2,
|
| 423 |
+
"name": "retrieve_data",
|
| 424 |
+
"input": {
|
| 425 |
+
"source_id": "src_sales_db",
|
| 426 |
+
"table_id": "orders",
|
| 427 |
+
"select": ["region", "amount"],
|
| 428 |
+
"group_by": ["region"]
|
| 429 |
+
},
|
| 430 |
+
"output": {
|
| 431 |
+
"kind": "table",
|
| 432 |
+
"columns": ["region", "total"],
|
| 433 |
+
"row_count": 4,
|
| 434 |
+
"preview": [["Central", 1210000], ["East", 740000]]
|
| 435 |
+
},
|
| 436 |
+
"status": "success"
|
| 437 |
+
}
|
| 438 |
+
],
|
| 439 |
+
"sources": [
|
| 440 |
+
{
|
| 441 |
+
"type": "database",
|
| 442 |
+
"source_id": "src_sales_db",
|
| 443 |
+
"name": "orders",
|
| 444 |
+
"query": "SELECT region, SUM(amount) AS total FROM orders GROUP BY region",
|
| 445 |
+
"detail": {
|
| 446 |
+
"tables": ["orders"],
|
| 447 |
+
"row_count": 48213
|
| 448 |
+
}
|
| 449 |
+
}
|
| 450 |
+
]
|
| 451 |
+
}
|
| 452 |
+
```
|
| 453 |
+
|
| 454 |
+
Response `200` for `unstructured_flow`:
|
| 455 |
+
|
| 456 |
+
```json
|
| 457 |
+
{
|
| 458 |
+
"analysis_id": "an_42",
|
| 459 |
+
"message_id": "msg_55",
|
| 460 |
+
"intent": "unstructured_flow",
|
| 461 |
+
"generated_at": "2026-06-30T03:40:02.001Z",
|
| 462 |
+
"planning": null,
|
| 463 |
+
"thinking": null,
|
| 464 |
+
"tool_calls": [
|
| 465 |
+
{
|
| 466 |
+
"order": 1,
|
| 467 |
+
"name": "retrieve_knowledge",
|
| 468 |
+
"input": {
|
| 469 |
+
"query": "technology stack used in this project",
|
| 470 |
+
"top_k": 4
|
| 471 |
+
},
|
| 472 |
+
"output": {
|
| 473 |
+
"kind": "documents",
|
| 474 |
+
"row_count": 4
|
| 475 |
+
},
|
| 476 |
+
"status": "success"
|
| 477 |
+
}
|
| 478 |
+
],
|
| 479 |
+
"sources": [
|
| 480 |
+
{
|
| 481 |
+
"type": "document",
|
| 482 |
+
"document_id": "doc_7",
|
| 483 |
+
"filename": "tech_handbook.pdf",
|
| 484 |
+
"page_label": "12",
|
| 485 |
+
"query": "technology stack used in this project",
|
| 486 |
+
"snippet": "The backend is built on FastAPI with async SQLAlchemy...",
|
| 487 |
+
"score": 0.83
|
| 488 |
+
}
|
| 489 |
+
]
|
| 490 |
+
}
|
| 491 |
+
```
|
| 492 |
+
|
| 493 |
+
Response `200` for simple chat or greeting:
|
| 494 |
+
|
| 495 |
+
```json
|
| 496 |
+
{
|
| 497 |
+
"analysis_id": "an_42",
|
| 498 |
+
"message_id": "msg_12",
|
| 499 |
+
"intent": "chat",
|
| 500 |
+
"generated_at": "2026-06-30T03:05:00.000Z",
|
| 501 |
+
"planning": null,
|
| 502 |
+
"thinking": null,
|
| 503 |
+
"tool_calls": [],
|
| 504 |
+
"sources": []
|
| 505 |
+
}
|
| 506 |
+
```
|
| 507 |
+
|
| 508 |
+
Response `404`:
|
| 509 |
+
|
| 510 |
+
```json
|
| 511 |
+
{
|
| 512 |
+
"detail": "No observability for message 'msg_88f1' yet."
|
| 513 |
+
}
|
| 514 |
+
```
|
| 515 |
+
|
| 516 |
+
Frontend rendering guidance:
|
| 517 |
+
|
| 518 |
+
- Render observability separately from the streamed answer.
|
| 519 |
+
- Default state can be collapsed.
|
| 520 |
+
- Show planning, tool calls, and sources as separate sections.
|
| 521 |
+
- Treat `planning: null`, `tool_calls: []`, and `sources: []` as valid states.
|
API_ENDPOINTS.md
DELETED
|
@@ -1,373 +0,0 @@
|
|
| 1 |
-
# Data Eyond — Python Agentic Service: FE-Callable API (for Go integration)
|
| 2 |
-
|
| 3 |
-
**Audience:** Harry (Go gateway) wiring the FE → Go → Python surface.
|
| 4 |
-
**Scope:** the **4 FE-callable surfaces** the Python service exposes after the 2026-06-24 pivot
|
| 5 |
-
(DEV_PLAN decision #6). Everything else under `/api/v1` is internal / Phase-1 legacy / Go-owned —
|
| 6 |
-
see [§7](#7-not-fe-facing) and the full inventory in [§9](#9-appendix--complete-endpoint-inventory-all-registered-routes).
|
| 7 |
-
**Branch:** `pr/4` · **Snapshot:** 2026-06-25 · **Companion:** [REPO_STATUS.md](REPO_STATUS.md).
|
| 8 |
-
|
| 9 |
-
> Request flow is **FE → Go → Python**. The FE never calls Python directly except for chat
|
| 10 |
-
> streaming. Auth/JWT is terminated at the Go gateway; Python receives `user_id` / `room_id` as
|
| 11 |
-
> **trusted inputs** and does no auth of its own.
|
| 12 |
-
|
| 13 |
-
---
|
| 14 |
-
|
| 15 |
-
## 1. The 4 FE-callable surfaces
|
| 16 |
-
|
| 17 |
-
| # | Logical name | HTTP | How it's invoked |
|
| 18 |
-
|---|---|---|---|
|
| 19 |
-
| 1 | **`call_agent`** | `POST /api/v1/chat/stream` | The one streaming chat call. Router classifies + dispatches. |
|
| 20 |
-
| 2 | **`list_skills`** | `GET /api/v1/tools` | Static slash-command catalog for the FE "/" menu. Cacheable. |
|
| 21 |
-
| 3 | **skill: `help`** | *(via `call_agent`)* | **No dedicated endpoint** — the router resolves it to the `help` intent inside `/chat/stream`. |
|
| 22 |
-
| 4 | **skill: `report`** | `POST /api/v1/report` (+ 2 `GET`s) | Dedicated REST API. **Not** through `/chat/stream`. |
|
| 23 |
-
|
| 24 |
-
**Key consequence for Go:** the two catalog skills are invoked **differently**. `/help` goes through
|
| 25 |
-
`/chat/stream`; `/report` is a direct REST call to the Report API. The catalog's `name` field is the
|
| 26 |
-
internal route key (`help` = router intent; `report` = the Report API), not a uniform dispatch key.
|
| 27 |
-
|
| 28 |
-
**Conventions:**
|
| 29 |
-
- Base path: `/api/v1`.
|
| 30 |
-
- **`room_id == analysis_id`** — one chat room == one analysis session (#9). Callers pass `room_id`
|
| 31 |
-
to chat; it *is* the `analysis_id` used by the report API.
|
| 32 |
-
- Streaming uses **SSE** (`text/event-stream`, `sse-starlette`).
|
| 33 |
-
|
| 34 |
-
---
|
| 35 |
-
|
| 36 |
-
## 2. `call_agent` — `POST /api/v1/chat/stream`
|
| 37 |
-
|
| 38 |
-
The only FE→Python call in normal operation. Source: [chat.py:169](src/api/v1/chat.py:169).
|
| 39 |
-
|
| 40 |
-
**Request body** (`application/json`) — `ChatRequest`:
|
| 41 |
-
|
| 42 |
-
```json
|
| 43 |
-
{
|
| 44 |
-
"user_id": "u_1a2b3c",
|
| 45 |
-
"room_id": "room_42",
|
| 46 |
-
"message": "What were total sales by region last quarter?"
|
| 47 |
-
}
|
| 48 |
-
```
|
| 49 |
-
|
| 50 |
-
`room_id` is the analysis session id. No auth header (handled by Go).
|
| 51 |
-
|
| 52 |
-
**Response:** `text/event-stream`. Events arrive in this order:
|
| 53 |
-
|
| 54 |
-
| `event:` | `data:` payload | Notes |
|
| 55 |
-
|---|---|---|
|
| 56 |
-
| `sources` | JSON array of source refs | `{document_id, filename, page_label}`. Structured: one per executed table (`document_id = "{user_id}_{table}"`, `page_label = null`). Unstructured: deduped doc/page. `chat`/`help`/`error`: `[]`. |
|
| 57 |
-
| `status` | text | **Slow-path only** — progress pings ("Planning…", "Running N steps…"). Keeps the SSE alive; safe to surface or ignore. |
|
| 58 |
-
| `chunk` | text fragment | Concatenate in order to form the answer. |
|
| 59 |
-
| `done` | *(empty)* | End of stream. |
|
| 60 |
-
| `error` | text | Terminal error; stream stops after this. |
|
| 61 |
-
|
| 62 |
-
> The handler also emits an internal `intent` event — it is **consumed inside Python** (gates
|
| 63 |
-
> caching) and **not forwarded** to the client. Go/FE will never see it.
|
| 64 |
-
|
| 65 |
-
**Example — `structured_flow` answer** (raw SSE wire; blank line separates events). Source shape:
|
| 66 |
-
[chat_handler.py:607](src/agents/chat_handler.py:607).
|
| 67 |
-
|
| 68 |
-
```
|
| 69 |
-
event: sources
|
| 70 |
-
data: [{"document_id":"u_1a2b3c_orders","filename":"orders","page_label":null}]
|
| 71 |
-
|
| 72 |
-
event: status
|
| 73 |
-
data: Planning analysis…
|
| 74 |
-
|
| 75 |
-
event: status
|
| 76 |
-
data: Running 3 steps…
|
| 77 |
-
|
| 78 |
-
event: chunk
|
| 79 |
-
data: Total sales by region last quarter:
|
| 80 |
-
|
| 81 |
-
event: chunk
|
| 82 |
-
data: Central led at $1.21M (38%), East $0.74M, West $0.55M (down 12% QoQ).
|
| 83 |
-
|
| 84 |
-
event: done
|
| 85 |
-
data:
|
| 86 |
-
```
|
| 87 |
-
|
| 88 |
-
**Example — simple `chat` reply** (no status pings, empty sources):
|
| 89 |
-
|
| 90 |
-
```
|
| 91 |
-
event: sources
|
| 92 |
-
data: []
|
| 93 |
-
|
| 94 |
-
event: chunk
|
| 95 |
-
data: I'm your AI data analyst — connect a source or ask a question to get started.
|
| 96 |
-
|
| 97 |
-
event: done
|
| 98 |
-
data:
|
| 99 |
-
```
|
| 100 |
-
|
| 101 |
-
**Behavior worth knowing for integration:**
|
| 102 |
-
- **Redis response cache** (1h TTL) is applied to the stateless `chat` intent only; cached replies
|
| 103 |
-
replay as `sources`/`chunk`/`done`.
|
| 104 |
-
- **Greeting/farewell fast-path** returns a canned reply with no LLM call.
|
| 105 |
-
- The LLM **router** classifies every message into one of **5 intents** —
|
| 106 |
-
`chat` · `help` · `check` · `unstructured_flow` · `structured_flow` — and dispatches. Messages
|
| 107 |
-
persist (user + assistant) on `done`.
|
| 108 |
-
|
| 109 |
-
---
|
| 110 |
-
|
| 111 |
-
## 3. `list_skills` — `GET /api/v1/tools`
|
| 112 |
-
|
| 113 |
-
Static, deterministic, **safe for Go to cache**. Source: [tools.py:133](src/api/v1/tools.py:133).
|
| 114 |
-
|
| 115 |
-
**Request:** none (no params, no body).
|
| 116 |
-
|
| 117 |
-
**Response** `200` (`ListToolsResponse`):
|
| 118 |
-
|
| 119 |
-
```json
|
| 120 |
-
{
|
| 121 |
-
"count": 2,
|
| 122 |
-
"tools": [
|
| 123 |
-
{ "command": "/help", "name": "help", "type": "skill",
|
| 124 |
-
"description": "Show what the assistant can do and guide your next step." },
|
| 125 |
-
{ "command": "/report", "name": "report", "type": "skill",
|
| 126 |
-
"description": "Generate a versioned analysis report (background, EDA, key findings, insights)." }
|
| 127 |
-
]
|
| 128 |
-
}
|
| 129 |
-
```
|
| 130 |
-
|
| 131 |
-
`CommandResponse` = `{ command, name, type, description }`, `type ∈ {skill, analytics, data_access}`.
|
| 132 |
-
Post-KM-678 the catalog is **`/help` + `/report` only**; the `analyze_*`, `check_*`, `retrieve_*`
|
| 133 |
-
and retired `/problem-statement` entries are commented out (kept for restorability), not deleted.
|
| 134 |
-
|
| 135 |
-
---
|
| 136 |
-
|
| 137 |
-
## 4. skill: `help` — via `call_agent`
|
| 138 |
-
|
| 139 |
-
**There is no `/help` endpoint.** The FE "/" menu surfaces `/help`; to invoke it, call
|
| 140 |
-
`POST /api/v1/chat/stream` and let the router classify the message as the `help` intent
|
| 141 |
-
([chat_handler.py:363](src/agents/chat_handler.py:363)). Help streams `chunk` events (same SSE
|
| 142 |
-
shape as §2, with `sources: []` and no `status` pings) — a state-aware, next-step guidance reply.
|
| 143 |
-
|
| 144 |
-
```
|
| 145 |
-
event: sources
|
| 146 |
-
data: []
|
| 147 |
-
|
| 148 |
-
event: chunk
|
| 149 |
-
data: Your goal is set — you can start exploring now. Try a question like "average order value by month", then I can generate a report.
|
| 150 |
-
|
| 151 |
-
event: done
|
| 152 |
-
data:
|
| 153 |
-
```
|
| 154 |
-
|
| 155 |
-
> **Open integration question (for Harry):** the Python `/chat/stream` contract has **no
|
| 156 |
-
> forced-intent / slash-bypass param** — `handle()` always routes via the LLM classifier. So
|
| 157 |
-
> deterministic `/help` dispatch depends on either (a) Go forwarding the literal slash text and
|
| 158 |
-
> trusting the router to classify it as `help`, or (b) adding a forced-intent input to the chat
|
| 159 |
-
> contract. The `tools.py` docstring's "slash invocation bypasses the router to the tool directly"
|
| 160 |
-
> is **not yet true on the Python side.** Needs a decision. (DEV_PLAN #8/#18.)
|
| 161 |
-
|
| 162 |
-
---
|
| 163 |
-
|
| 164 |
-
## 5. skill: `report` — Report API
|
| 165 |
-
|
| 166 |
-
Dedicated REST surface (the "Generate Report" button), **not** a chat route.
|
| 167 |
-
Source: [report.py](src/api/v1/report.py).
|
| 168 |
-
|
| 169 |
-
### `POST /api/v1/report`
|
| 170 |
-
Generate, persist, and return a new report **version**.
|
| 171 |
-
|
| 172 |
-
**Query params:** `analysis_id` (required), `user_id` (required). No request body.
|
| 173 |
-
|
| 174 |
-
```
|
| 175 |
-
POST /api/v1/report?analysis_id=room_42&user_id=u_1a2b3c
|
| 176 |
-
```
|
| 177 |
-
|
| 178 |
-
| Status | Meaning |
|
| 179 |
-
|---|---|
|
| 180 |
-
| `201` | New version generated → `AnalysisReport` body. |
|
| 181 |
-
| `409` | Floor not met — **no recorded analyses yet** for this session, nothing to report. |
|
| 182 |
-
| `500` | Generation or persistence failed. |
|
| 183 |
-
|
| 184 |
-
**`201` response** (`AnalysisReport`):
|
| 185 |
-
|
| 186 |
-
```json
|
| 187 |
-
{
|
| 188 |
-
"report_id": "8f3a2b1c9d4e4f6a8b0c1d2e3f4a5b6c",
|
| 189 |
-
"analysis_id": "room_42",
|
| 190 |
-
"user_id": "u_1a2b3c",
|
| 191 |
-
"version": 2,
|
| 192 |
-
"generated_at": "2026-06-25T09:14:33.512Z",
|
| 193 |
-
"problem_statement": {
|
| 194 |
-
"objective": "Understand which regions drive revenue and why Q1 dipped.",
|
| 195 |
-
"business_questions": [
|
| 196 |
-
"Which regions contribute most to total revenue?",
|
| 197 |
-
"Did any region decline quarter-over-quarter?"
|
| 198 |
-
]
|
| 199 |
-
},
|
| 200 |
-
"record_ids": ["rec_a1", "rec_b2"],
|
| 201 |
-
"executive_summary": "Revenue is concentrated in the Central region (38% of total). The West was the only region to contract, down 12% QoQ — the main driver of the Q1 dip.",
|
| 202 |
-
"findings": [
|
| 203 |
-
{ "text": "Central region contributed 38% of total revenue, the largest share.",
|
| 204 |
-
"record_ids": ["rec_a1"], "supporting_data": null },
|
| 205 |
-
{ "text": "West region revenue fell 12% quarter-over-quarter.",
|
| 206 |
-
"record_ids": ["rec_b2"], "supporting_data": null }
|
| 207 |
-
],
|
| 208 |
-
"caveats": [
|
| 209 |
-
{ "text": "March data for the East region was partially missing (~6% of rows).",
|
| 210 |
-
"record_ids": ["rec_b2"] }
|
| 211 |
-
],
|
| 212 |
-
"open_questions": [
|
| 213 |
-
{ "text": "What drove the West region's QoQ decline?", "record_ids": ["rec_b2"] }
|
| 214 |
-
],
|
| 215 |
-
"data_sources": [
|
| 216 |
-
{ "source_id": "src_sales_db", "name": "orders", "source_type": "postgres",
|
| 217 |
-
"detail": { "tables": ["orders"], "row_count": 48213,
|
| 218 |
-
"columns": ["region", "amount", "ordered_at"] } }
|
| 219 |
-
],
|
| 220 |
-
"method_steps": [
|
| 221 |
-
{ "task_id": "t1", "stage": "data_understanding", "objective": "Inventory the sales source",
|
| 222 |
-
"status": "success", "tools_used": ["check_data"] },
|
| 223 |
-
{ "task_id": "t2", "stage": "modeling", "objective": "Aggregate revenue by region",
|
| 224 |
-
"status": "success", "tools_used": ["analyze_aggregate"] }
|
| 225 |
-
],
|
| 226 |
-
"rendered_markdown": "# Analysis Report\n\n*Generated 2026-06-25 by u_1a2b3c · 2 analyses · 1 source(s)*\n\n## Objective\nUnderstand which regions drive revenue…\n\n## Key Findings\n1. Central region contributed 38%…"
|
| 227 |
-
}
|
| 228 |
-
```
|
| 229 |
-
|
| 230 |
-
**`409` response** (floor not met — the demo's most common error):
|
| 231 |
-
|
| 232 |
-
```json
|
| 233 |
-
{ "detail": "Not ready to generate a report — still needs at least one completed analysis." }
|
| 234 |
-
```
|
| 235 |
-
|
| 236 |
-
> ⚠️ **Demo/integration precondition:** `AnalysisRecord`s persist **only on the slow path**, so
|
| 237 |
-
> reports require **`enable_slow_path=true`** on the Python deployment *and* ≥1 prior
|
| 238 |
-
> `structured_flow` question in the session. With slow path off, `POST /report` **409s by design**,
|
| 239 |
-
> not a bug. (DEV_PLAN #15/#16.)
|
| 240 |
-
|
| 241 |
-
### `GET /api/v1/report/{analysis_id}`
|
| 242 |
-
List a session's report versions (oldest-first). Returns `[ReportVersionEntry]`; `[]` if none.
|
| 243 |
-
|
| 244 |
-
```json
|
| 245 |
-
[
|
| 246 |
-
{ "report_id": "1b2c3d4e…", "version": 1, "generated_at": "2026-06-24T15:02:11Z", "record_count": 1 },
|
| 247 |
-
{ "report_id": "8f3a2b1c…", "version": 2, "generated_at": "2026-06-25T09:14:33Z", "record_count": 2 }
|
| 248 |
-
]
|
| 249 |
-
```
|
| 250 |
-
|
| 251 |
-
### `GET /api/v1/report/{analysis_id}/{version}`
|
| 252 |
-
Fetch one version → `AnalysisReport` (same shape as the `POST` 201 body above); `404` if that
|
| 253 |
-
version doesn't exist.
|
| 254 |
-
|
| 255 |
-
```json
|
| 256 |
-
{ "detail": "No report v3 for analysis 'room_42'." }
|
| 257 |
-
```
|
| 258 |
-
|
| 259 |
-
---
|
| 260 |
-
|
| 261 |
-
## 6. Schemas
|
| 262 |
-
|
| 263 |
-
**`AnalysisReport`** (POST + GET-version body):
|
| 264 |
-
|
| 265 |
-
| Field | Type | Notes |
|
| 266 |
-
|---|---|---|
|
| 267 |
-
| `report_id` | str | |
|
| 268 |
-
| `analysis_id` | str | == `room_id` |
|
| 269 |
-
| `user_id` | str \| null | |
|
| 270 |
-
| `version` | int | monotonic V1, V2, … |
|
| 271 |
-
| `generated_at` | datetime | ISO 8601, UTC |
|
| 272 |
-
| `problem_statement` | `{ objective: str, business_questions: string[] }` | the frozen goal snapshot (new pivot shape) |
|
| 273 |
-
| `record_ids` | string[] | records the version was built from |
|
| 274 |
-
| `executive_summary` | str | the **only** LLM-authored field |
|
| 275 |
-
| `findings` | `ReportFinding[]` | `{ text, record_ids[], supporting_data? }` |
|
| 276 |
-
| `caveats` | `AttributedNote[]` | `{ text, record_ids[] }` |
|
| 277 |
-
| `open_questions` | `AttributedNote[]` | `{ text, record_ids[] }` |
|
| 278 |
-
| `data_sources` | `DataSourceRef[]` | `{ source_id, name, source_type, detail }` |
|
| 279 |
-
| `method_steps` | `TaskSummary[]` | `{ task_id, stage, objective, status, tools_used[] }`; `stage` ∈ CRISP-DM phases |
|
| 280 |
-
| `rendered_markdown` | str | the full rendered report |
|
| 281 |
-
|
| 282 |
-
> **Persistence caveat:** dedorch `reports` stores **markdown only**. On read-back via the `GET`
|
| 283 |
-
> endpoints, the structured fields above come back **empty** and `rendered_markdown` is the source of
|
| 284 |
-
> truth. (REPO_STATUS §5.)
|
| 285 |
-
|
| 286 |
-
**`ReportVersionEntry`** (GET-list rows): `{ report_id, version, generated_at, record_count }`.
|
| 287 |
-
|
| 288 |
-
---
|
| 289 |
-
|
| 290 |
-
## 7. Not FE-facing
|
| 291 |
-
|
| 292 |
-
Registered under `/api/v1` but **not** part of the FE→Python surface — do not wire these from the FE:
|
| 293 |
-
|
| 294 |
-
- **Analysis CRUD** — `POST /analysis/create`, `GET /analysis`, `GET /analysis/{id}`. Intended to
|
| 295 |
-
move behind Go (state writes via Go, per decision #5/#18). Router still **mounted** (Go may use it);
|
| 296 |
-
the FE should not call it.
|
| 297 |
-
- **`check_data` / `check_knowledge`** — served by **Go**, not surfaced as Python FE endpoints.
|
| 298 |
-
- **Chat cache management** — `DELETE /chat/cache`, `/chat/cache/room/{id}`, `/retrieval/cache/{user_id}`
|
| 299 |
-
(ops/internal).
|
| 300 |
-
- **Phase-1 legacy routers** — `users`, `room`, `document`, `db_client`, `data_catalog`
|
| 301 |
-
(functionally migrated to Go; mostly dormant).
|
| 302 |
-
- **Health/root** — `GET /`, `GET /health` (liveness only).
|
| 303 |
-
|
| 304 |
-
---
|
| 305 |
-
|
| 306 |
-
## 8. Open items affecting this contract
|
| 307 |
-
|
| 308 |
-
1. **`/help` dispatch mechanism** — router-classify vs. forced-intent param (§4). *(DEV_PLAN #8/#18)*
|
| 309 |
-
2. **`/report` needs `enable_slow_path=true`** + a prior `structured_flow` question, else 409.
|
| 310 |
-
*(DEV_PLAN #15)*
|
| 311 |
-
3. **`analysis_records` home** post-`SKIP_INIT_DB` cutover — the report API depends on this table
|
| 312 |
-
existing. *(DEV_PLAN #14/#16)*
|
| 313 |
-
4. **Analysis-state writes** — once Go owns creation + state writes, Python's per-turn state
|
| 314 |
-
`ensure` becomes a read-only get (Go must guarantee the row exists before any chat turn).
|
| 315 |
-
*(DEV_PLAN #18)*
|
| 316 |
-
|
| 317 |
-
---
|
| 318 |
-
|
| 319 |
-
## 9. Appendix — complete endpoint inventory (all registered routes)
|
| 320 |
-
|
| 321 |
-
Every route mounted in [main.py](main.py), so task #8 can be decided against the full picture.
|
| 322 |
-
**32 routes** across 9 routers + 2 app-level. Status legend:
|
| 323 |
-
**✅ FE-callable** (one of the 4 surfaces — keep) · **✂️ comment out** (task #8 target) ·
|
| 324 |
-
**🟦 legacy → Go** (Phase-1, functionally migrated; not FE→Python; mostly dormant) ·
|
| 325 |
-
**⚙️ internal/ops**.
|
| 326 |
-
|
| 327 |
-
| Method | Path | Purpose | Router | Status |
|
| 328 |
-
|---|---|---|---|---|
|
| 329 |
-
| POST | `/api/v1/chat/stream` | Main chat SSE — **`call_agent`**; carries chat/help/check/structured/unstructured intents | Chat | ✅ FE-callable (#1, +help #3) |
|
| 330 |
-
| GET | `/api/v1/tools` | Slash-command catalog — **`list_skills`** (Go caches) | Tools | ✅ FE-callable (#2) |
|
| 331 |
-
| POST | `/api/v1/report` | Generate a report version | Report | ✅ FE-callable (#4) |
|
| 332 |
-
| GET | `/api/v1/report/{analysis_id}` | List report versions | Report | ✅ FE-callable (#4) |
|
| 333 |
-
| GET | `/api/v1/report/{analysis_id}/{version}` | Fetch one report version | Report | ✅ FE-callable (#4) |
|
| 334 |
-
| POST | `/api/v1/analysis/create` | Create session (state + room + bindings) | Analysis | ✂️ comment (#8 → Go) |
|
| 335 |
-
| GET | `/api/v1/analysis` | List a user's analyses | Analysis | ✂️ comment (#8) |
|
| 336 |
-
| GET | `/api/v1/analysis/{analysis_id}` | Get one session's state + sources | Analysis | ✂️ comment (#8) |
|
| 337 |
-
| DELETE | `/api/v1/chat/cache` | Clear one cached reply | Chat | ⚙️ internal/ops |
|
| 338 |
-
| DELETE | `/api/v1/chat/cache/room/{room_id}` | Clear a room's cache | Chat | ⚙️ internal/ops |
|
| 339 |
-
| DELETE | `/api/v1/retrieval/cache/{user_id}` | Clear a user's retrieval cache | Chat | ⚙️ internal/ops |
|
| 340 |
-
| GET | `/` | Service status | (app) | ⚙️ internal/ops |
|
| 341 |
-
| GET | `/health` | Liveness probe | (app) | ⚙️ internal/ops |
|
| 342 |
-
| POST | `/api/login` | Login by email + password ⚠️ mounted at `/api`, **not** `/api/v1` | Users | 🟦 legacy → Go |
|
| 343 |
-
| GET | `/api/v1/documents/doctypes` | Supported document types | Documents | 🟦 legacy → Go |
|
| 344 |
-
| GET | `/api/v1/documents/{user_id}` | List a user's documents | Documents | 🟦 legacy → Go |
|
| 345 |
-
| POST | `/api/v1/document/upload` | Upload a document (10/min) | Documents | 🟦 legacy → Go |
|
| 346 |
-
| DELETE | `/api/v1/document/delete` | Delete a document | Documents | 🟦 legacy → Go |
|
| 347 |
-
| POST | `/api/v1/document/process` | Process / ingest a document | Documents | 🟦 legacy → Go |
|
| 348 |
-
| GET | `/api/v1/rooms/{user_id}` | List a user's rooms | Rooms | 🟦 legacy → Go |
|
| 349 |
-
| GET | `/api/v1/room/{room_id}` | Get one room | Rooms | 🟦 legacy → Go |
|
| 350 |
-
| DELETE | `/api/v1/room/{room_id}` | Delete a room | Rooms | 🟦 legacy → Go |
|
| 351 |
-
| POST | `/api/v1/room/create` | Create a room | Rooms | 🟦 legacy → Go |
|
| 352 |
-
| GET | `/api/v1/data-catalog/{user_id}` | List catalog index | Data Catalog | 🟦 legacy → Go |
|
| 353 |
-
| POST | `/api/v1/data-catalog/rebuild` | Rebuild a user's catalog | Data Catalog | 🟦 legacy → Go |
|
| 354 |
-
| GET | `/api/v1/database-clients/dbtypes` | Supported DB types | Database Clients | 🟦 legacy → Go |
|
| 355 |
-
| POST | `/api/v1/database-clients` | Create a DB connection | Database Clients | 🟦 legacy → Go |
|
| 356 |
-
| GET | `/api/v1/database-clients/{user_id}` | List a user's DB connections | Database Clients | 🟦 legacy → Go |
|
| 357 |
-
| GET | `/api/v1/database-clients/{user_id}/{client_id}` | Get one DB connection | Database Clients | 🟦 legacy → Go |
|
| 358 |
-
| PUT | `/api/v1/database-clients/{client_id}` | Update a DB connection | Database Clients | 🟦 legacy → Go |
|
| 359 |
-
| DELETE | `/api/v1/database-clients/{client_id}` | Delete a DB connection | Database Clients | 🟦 legacy → Go |
|
| 360 |
-
| POST | `/api/v1/database-clients/{client_id}/ingest` | Build the catalog for a DB connection | Database Clients | 🟦 legacy → Go |
|
| 361 |
-
|
| 362 |
-
**Tally:** 5 ✅ FE-callable · 3 ✂️ to comment (#8) · 19 🟦 legacy→Go · 5 ⚙️ internal/ops.
|
| 363 |
-
|
| 364 |
-
**Task #8 reading:**
|
| 365 |
-
- **Keep exposed:** the 5 ✅ rows (`chat/stream`, `/tools`, the 3 `report` routes). `help` rides on
|
| 366 |
-
`chat/stream` — no route of its own.
|
| 367 |
-
- **Comment out (the #8 to-do):** the 3 `analysis` routes — analysis CRUD moves behind Go (#5/#18).
|
| 368 |
-
- **`check_data` is not an HTTP endpoint** — it's the `check` router intent (runs inside
|
| 369 |
-
`chat/stream`) plus its now-commented slash-catalog entry (KM-678); Go serves it to the FE. So
|
| 370 |
-
"comment check_data" = the catalog line (done) + don't expose a Python route (there isn't one).
|
| 371 |
-
- The 19 🟦 routers (`users`, `document`, `room`, `data_catalog`, `db_client`) are Phase-1 legacy,
|
| 372 |
-
already functionally in Go (REPO_STATUS §7). They're out of the FE→Python path but **still
|
| 373 |
-
mounted** — a separate cleanup from #8's analysis-CRUD scope.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
API_ENDPOINTS_RESTRUCTURE.md
ADDED
|
@@ -0,0 +1,391 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Backend Agentic Service — API Endpoint Docs (endpoint restructure)
|
| 2 |
+
|
| 3 |
+
**Status:** contract draft for FE/Go integration (2026-06-30). Covers the AI-only surface after the
|
| 4 |
+
restructure. Sections marked **TENTATIVE** (observability) may still change — send feedback before we
|
| 5 |
+
lock them.
|
| 6 |
+
|
| 7 |
+
**What changed**
|
| 8 |
+
- **Only the chat pilot moves to `/api/v2`.** Everything else stays on `/api/v1`, regrouped under `/tools`.
|
| 9 |
+
- **Chat pilot (`/api/v2/chat/stream`) uses `analysis_id`, not `room_id`.**
|
| 10 |
+
- **Skills are grouped under `/api/v1/tools`:** `list` / `help` / `report`.
|
| 11 |
+
- **New:** `GET /api/v1/observability` — Responsible-AI provenance per chat answer.
|
| 12 |
+
- Python is **generative-AI only.** It never creates/updates an analysis, room, document, DB
|
| 13 |
+
client, or catalog — Go owns those. Python just receives `analysis_id`. Those v1 routers are
|
| 14 |
+
unwired from `main` + Swagger (not deleted).
|
| 15 |
+
|
| 16 |
+
**Open coordination questions (need a decision with Harry) — flagged inline as ⚠️:**
|
| 17 |
+
1. **`message_id` origin** — who mints the assistant turn id used to correlate stream ↔ observability? (Recommend: Go mints it, passes it in the chat request, Python echoes on `done`.)
|
| 18 |
+
2. **Deterministic `/help` dispatch** — dedicated endpoint (recommended below) vs router classification.
|
| 19 |
+
3. **Observability storage** — single JSONB row per message (recommended) vs 3 normalized tables.
|
| 20 |
+
|
| 21 |
+
---
|
| 22 |
+
|
| 23 |
+
## 1. call_agent — `POST /api/v2/chat/stream`
|
| 24 |
+
|
| 25 |
+
The only FE→Python call in normal operation. Same as v1 except **`room_id` → `analysis_id`**, and
|
| 26 |
+
the `done` event now carries the assistant `message_id` for observability correlation.
|
| 27 |
+
|
| 28 |
+
**Request body** (`application/json`) — `ChatRequest`:
|
| 29 |
+
|
| 30 |
+
```json
|
| 31 |
+
{
|
| 32 |
+
"user_id": "u_1a2b3c",
|
| 33 |
+
"analysis_id": "an_42",
|
| 34 |
+
"message_id": "msg_88f1",
|
| 35 |
+
"message": "What were total sales by region last quarter?"
|
| 36 |
+
}
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
- `analysis_id` is the analysis-session id (replaces `room_id`). No auth header (handled by Go).
|
| 40 |
+
- ⚠️ `message_id` (optional): the assistant turn id. **Recommended: Go mints it** alongside the
|
| 41 |
+
`analyses_messages` row and passes it here, so the FE can call `/api/v1/observability?message_id=...`
|
| 42 |
+
in parallel. If omitted, Python mints one and returns it on `done`.
|
| 43 |
+
|
| 44 |
+
**Response:** `text/event-stream`. Events arrive in this order:
|
| 45 |
+
|
| 46 |
+
| event | data | notes |
|
| 47 |
+
|---|---|---|
|
| 48 |
+
| `sources` | JSON array of `{document_id, filename, page_label}` | structured: one per executed table; unstructured: deduped doc/page; chat/help/error: `[]`. |
|
| 49 |
+
| `status` | text | slow-path only — progress pings ("Planning…", "Running N steps…"). Safe to surface or ignore. |
|
| 50 |
+
| `chunk` | text fragment | concatenate in order to form the answer. |
|
| 51 |
+
| `done` | `{"message_id": "..."}` | **v2 change:** was empty; now returns the turn id for the observability lookup. |
|
| 52 |
+
| `error` | text | terminal error; stream stops after this. |
|
| 53 |
+
|
| 54 |
+
The internal `intent` event is consumed inside Python (gates caching) and **not** forwarded.
|
| 55 |
+
|
| 56 |
+
**Stream carries the answer text ONLY.** Planning / tool calls / sources detail are **not** in the
|
| 57 |
+
stream (it would slow it down) — fetch them from `/observability` (§7), called in parallel.
|
| 58 |
+
|
| 59 |
+
**Example — `structured_flow` answer** (raw SSE; blank line separates events):
|
| 60 |
+
|
| 61 |
+
```
|
| 62 |
+
event: sources
|
| 63 |
+
data: [{"document_id":"u_1a2b3c_orders","filename":"orders","page_label":null}]
|
| 64 |
+
|
| 65 |
+
event: status
|
| 66 |
+
data: Planning analysis…
|
| 67 |
+
|
| 68 |
+
event: status
|
| 69 |
+
data: Running 3 steps…
|
| 70 |
+
|
| 71 |
+
event: chunk
|
| 72 |
+
data: Total sales by region last quarter:
|
| 73 |
+
|
| 74 |
+
event: chunk
|
| 75 |
+
data: Central led at $1.21M (38%), East $0.74M, West $0.55M (down 12% QoQ).
|
| 76 |
+
|
| 77 |
+
event: done
|
| 78 |
+
data: {"message_id":"msg_88f1"}
|
| 79 |
+
```
|
| 80 |
+
|
| 81 |
+
**Example — simple chat reply** (no status pings, empty sources):
|
| 82 |
+
|
| 83 |
+
```
|
| 84 |
+
event: sources
|
| 85 |
+
data: []
|
| 86 |
+
|
| 87 |
+
event: chunk
|
| 88 |
+
data: I'm your AI data analyst — connect a source or ask a question to get started.
|
| 89 |
+
|
| 90 |
+
event: done
|
| 91 |
+
data: {"message_id":"msg_12"}
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
Behavior unchanged from v1: 1h Redis response-cache on the stateless `chat` intent only;
|
| 95 |
+
greeting/farewell fast-path (canned, no LLM); LLM router classifies every message into one of 5
|
| 96 |
+
intents (`chat · help · check · unstructured_flow · structured_flow`); messages persist on `done`.
|
| 97 |
+
|
| 98 |
+
---
|
| 99 |
+
|
| 100 |
+
## 2. list_skills — `GET /api/v1/tools/list`
|
| 101 |
+
|
| 102 |
+
Static, deterministic, safe for Go to cache. (Was `GET /api/v1/tools`.)
|
| 103 |
+
|
| 104 |
+
**Request:** none.
|
| 105 |
+
|
| 106 |
+
**Response 200** (`ListToolsResponse`):
|
| 107 |
+
|
| 108 |
+
```json
|
| 109 |
+
{
|
| 110 |
+
"count": 2,
|
| 111 |
+
"tools": [
|
| 112 |
+
{ "command": "/help", "name": "help", "type": "skill",
|
| 113 |
+
"description": "Show what the assistant can do and guide your next step." },
|
| 114 |
+
{ "command": "/report", "name": "report", "type": "skill",
|
| 115 |
+
"description": "Generate a versioned analysis report (background, EDA, key findings, insights)." }
|
| 116 |
+
]
|
| 117 |
+
}
|
| 118 |
+
```
|
| 119 |
+
|
| 120 |
+
`CommandResponse = { command, name, type, description }`, `type ∈ {skill, analytics, data_access}`.
|
| 121 |
+
Catalog is `/help` + `/report` only; the `analyze_*` / `check_*` / `retrieve_*` and retired
|
| 122 |
+
`/problem-statement` entries are commented out (kept for restorability), not deleted.
|
| 123 |
+
|
| 124 |
+
**FE behavior:** the `/` slash menu surfaces **`/help` only**. **Report is a right-side button, not
|
| 125 |
+
a slash command** (it fires only when an analysis is finished — saves tokens).
|
| 126 |
+
|
| 127 |
+
---
|
| 128 |
+
|
| 129 |
+
## 3. skill: help — `POST /api/v1/tools/help`
|
| 130 |
+
|
| 131 |
+
⚠️ **Proposed dedicated endpoint** (new in v2). In v1 there was no `/help` endpoint — help was reached
|
| 132 |
+
only by letting the LLM router classify a chat message. A dedicated endpoint makes `/help` dispatch
|
| 133 |
+
**deterministic** (no risk the router mis-classifies the slash command) and gives it a clean home in
|
| 134 |
+
the tools group. State-aware: reads analysis state + history to guide the next step.
|
| 135 |
+
|
| 136 |
+
> Alternative if we *don't* add this endpoint: FE keeps calling `POST /chat/stream` and trusts the
|
| 137 |
+
> router to classify the help intent. We recommend the dedicated endpoint — decision pending (open
|
| 138 |
+
> question #2).
|
| 139 |
+
|
| 140 |
+
**Request body** (`application/json`):
|
| 141 |
+
|
| 142 |
+
```json
|
| 143 |
+
{
|
| 144 |
+
"user_id": "u_1a2b3c",
|
| 145 |
+
"analysis_id": "an_42"
|
| 146 |
+
}
|
| 147 |
+
```
|
| 148 |
+
|
| 149 |
+
**Response:** `text/event-stream` — same SSE shape as chat, with `sources: []` and no `status`
|
| 150 |
+
pings (help never references documents). Streams a next-step guidance reply.
|
| 151 |
+
|
| 152 |
+
```
|
| 153 |
+
event: sources
|
| 154 |
+
data: []
|
| 155 |
+
|
| 156 |
+
event: chunk
|
| 157 |
+
data: Your goal is set — you can start exploring now. Try a question like "average order value by month", then I can generate a report.
|
| 158 |
+
|
| 159 |
+
event: done
|
| 160 |
+
data: {"message_id":"msg_h7"}
|
| 161 |
+
```
|
| 162 |
+
|
| 163 |
+
---
|
| 164 |
+
|
| 165 |
+
## 4. skill: report — `POST /api/v1/tools/report`
|
| 166 |
+
|
| 167 |
+
The "Generate Report" button. Same as v1, moved under `/tools`. Generate, persist, and return a new
|
| 168 |
+
report version. Currently renders **Markdown** (FE preview); PPT/PDF/infographic export is future work
|
| 169 |
+
(triggered on a download button, not here).
|
| 170 |
+
|
| 171 |
+
**Query params:** `analysis_id` (required), `user_id` (required). No request body.
|
| 172 |
+
|
| 173 |
+
```
|
| 174 |
+
POST /api/v1/tools/report?analysis_id=an_42&user_id=u_1a2b3c
|
| 175 |
+
```
|
| 176 |
+
|
| 177 |
+
| status | meaning |
|
| 178 |
+
|---|---|
|
| 179 |
+
| 201 | new version generated → `AnalysisReport` body. |
|
| 180 |
+
| 409 | floor not met — no recorded analyses yet for this session, nothing to report. |
|
| 181 |
+
| 500 | generation or persistence failed. |
|
| 182 |
+
|
| 183 |
+
**201 response** (`AnalysisReport`):
|
| 184 |
+
|
| 185 |
+
```json
|
| 186 |
+
{
|
| 187 |
+
"report_id": "8f3a2b1c9d4e4f6a8b0c1d2e3f4a5b6c",
|
| 188 |
+
"analysis_id": "an_42",
|
| 189 |
+
"user_id": "u_1a2b3c",
|
| 190 |
+
"version": 2,
|
| 191 |
+
"generated_at": "2026-06-30T09:14:33.512Z",
|
| 192 |
+
"problem_statement": {
|
| 193 |
+
"objective": "Understand which regions drive revenue and why Q1 dipped.",
|
| 194 |
+
"business_questions": [
|
| 195 |
+
"Which regions contribute most to total revenue?",
|
| 196 |
+
"Did any region decline quarter-over-quarter?"
|
| 197 |
+
]
|
| 198 |
+
},
|
| 199 |
+
"record_ids": ["rec_a1", "rec_b2"],
|
| 200 |
+
"executive_summary": "Revenue is concentrated in the Central region (38% of total). The West was the only region to contract, down 12% QoQ — the main driver of the Q1 dip.",
|
| 201 |
+
"findings": [
|
| 202 |
+
{ "text": "Central region contributed 38% of total revenue, the largest share.",
|
| 203 |
+
"record_ids": ["rec_a1"], "supporting_data": null },
|
| 204 |
+
{ "text": "West region revenue fell 12% quarter-over-quarter.",
|
| 205 |
+
"record_ids": ["rec_b2"], "supporting_data": null }
|
| 206 |
+
],
|
| 207 |
+
"caveats": [
|
| 208 |
+
{ "text": "March data for the East region was partially missing (~6% of rows).",
|
| 209 |
+
"record_ids": ["rec_b2"] }
|
| 210 |
+
],
|
| 211 |
+
"open_questions": [
|
| 212 |
+
{ "text": "What drove the West region's QoQ decline?", "record_ids": ["rec_b2"] }
|
| 213 |
+
],
|
| 214 |
+
"data_sources": [
|
| 215 |
+
{ "source_id": "src_sales_db", "name": "orders", "source_type": "postgres",
|
| 216 |
+
"detail": { "tables": ["orders"], "row_count": 48213,
|
| 217 |
+
"columns": ["region", "amount", "ordered_at"] } }
|
| 218 |
+
],
|
| 219 |
+
"method_steps": [
|
| 220 |
+
{ "task_id": "t1", "stage": "data_understanding", "objective": "Inventory the sales source",
|
| 221 |
+
"status": "success", "tools_used": ["check_data"] },
|
| 222 |
+
{ "task_id": "t2", "stage": "modeling", "objective": "Aggregate revenue by region",
|
| 223 |
+
"status": "success", "tools_used": ["analyze_aggregate"] }
|
| 224 |
+
],
|
| 225 |
+
"rendered_markdown": "# Analysis Report\n\n*Generated 2026-06-30 by u_1a2b3c · 2 analyses · 1 source(s)*\n\n## Objective\nUnderstand which regions drive revenue…\n\n## Key Findings\n1. Central region contributed 38%…"
|
| 226 |
+
}
|
| 227 |
+
```
|
| 228 |
+
|
| 229 |
+
**409 response** (floor not met — the demo's most common error):
|
| 230 |
+
|
| 231 |
+
```json
|
| 232 |
+
{ "detail": "Not ready to generate a report — still needs at least one completed analysis." }
|
| 233 |
+
```
|
| 234 |
+
|
| 235 |
+
⚠️ **Precondition:** `AnalysisRecord`s persist only on the slow path, so reports require
|
| 236 |
+
`ENABLE_SLOW_PATH=true` on the Python deployment and ≥1 prior `structured_flow` question in the
|
| 237 |
+
session. With slow path off, `POST` 409s by design.
|
| 238 |
+
|
| 239 |
+
---
|
| 240 |
+
|
| 241 |
+
## 5. report versions — `GET /api/v1/tools/report/{analysis_id}` and `/{analysis_id}/{version}`
|
| 242 |
+
|
| 243 |
+
List a session's report versions (oldest-first). Returns `[ReportVersionEntry]`; `[]` if none.
|
| 244 |
+
|
| 245 |
+
```json
|
| 246 |
+
[
|
| 247 |
+
{ "report_id": "1b2c3d4e…", "version": 1, "generated_at": "2026-06-24T15:02:11Z", "record_count": 1 },
|
| 248 |
+
{ "report_id": "8f3a2b1c…", "version": 2, "generated_at": "2026-06-25T09:14:33Z", "record_count": 2 }
|
| 249 |
+
]
|
| 250 |
+
```
|
| 251 |
+
|
| 252 |
+
`GET /api/v1/tools/report/{analysis_id}/{version}` → one `AnalysisReport` (same shape as the POST
|
| 253 |
+
201 body); 404 if that version doesn't exist:
|
| 254 |
+
|
| 255 |
+
```json
|
| 256 |
+
{ "detail": "No report v3 for analysis 'an_42'." }
|
| 257 |
+
```
|
| 258 |
+
|
| 259 |
+
---
|
| 260 |
+
|
| 261 |
+
## 6. Unwired in v2 (mounted in v1, OFF in v2)
|
| 262 |
+
|
| 263 |
+
Commented out of `main` + Swagger, **files kept**. Go owns these; Python is generative-only:
|
| 264 |
+
`POST /analysis/create` + analysis CRUD · `room` · `db_client` · `document` · `data_catalog` ·
|
| 265 |
+
`users`/login. Re-mounting is a one-line `include_router` if ever needed.
|
| 266 |
+
|
| 267 |
+
---
|
| 268 |
+
|
| 269 |
+
## 7. observability — `GET /api/v1/observability` **(NEW · TENTATIVE)**
|
| 270 |
+
|
| 271 |
+
Responsible-AI provenance for **one chat answer**. Separate endpoint, called **in parallel with the
|
| 272 |
+
stream** — never embedded in it. The FE renders it as a collapsed dropdown the user can expand
|
| 273 |
+
(planning / tool calls / sources), Claude/Codex-style.
|
| 274 |
+
|
| 275 |
+
**Design (recommended):** one endpoint returns one merged object, backed by **one JSONB row per
|
| 276 |
+
message** written by an accumulating "scratchpad" decorator inside the chat agent and flushed on
|
| 277 |
+
`done`. The 3 facets (`planning` / `tool_calls` / `sources`) are **logical sections of the JSON**,
|
| 278 |
+
not separate tables — so the shape can evolve without a dedorch migration each time. (Storage is open
|
| 279 |
+
question #3.)
|
| 280 |
+
|
| 281 |
+
**Query params:** `analysis_id` (required), `message_id` (required).
|
| 282 |
+
|
| 283 |
+
```
|
| 284 |
+
GET /api/v1/observability?analysis_id=an_42&message_id=msg_88f1
|
| 285 |
+
```
|
| 286 |
+
|
| 287 |
+
**Timing:** the row is written when the turn finishes, so call this **after** the stream's `done`
|
| 288 |
+
event (or poll until 200). "Parallel" = a separate call the FE fires alongside the stream, not data
|
| 289 |
+
embedded in the stream.
|
| 290 |
+
|
| 291 |
+
**Field rules (by intent):**
|
| 292 |
+
- `planning` — present **only when the planner ran** (slow path); `null` otherwise.
|
| 293 |
+
- `tool_calls` — every tool invoked, with input + output. `[]` for pure chat / greeting / help.
|
| 294 |
+
- `sources` — **required for retrieve flows** (`structured_flow`, `unstructured_flow`). **Empty for
|
| 295 |
+
greeting / `chat` / `help`** (they don't reference documents).
|
| 296 |
+
- `thinking` — optional reasoning text; `null` if none.
|
| 297 |
+
|
| 298 |
+
**200 response — full `structured_flow` turn** (planner ran → all sections present):
|
| 299 |
+
|
| 300 |
+
```json
|
| 301 |
+
{
|
| 302 |
+
"analysis_id": "an_42",
|
| 303 |
+
"message_id": "msg_88f1",
|
| 304 |
+
"intent": "structured_flow",
|
| 305 |
+
"generated_at": "2026-06-30T03:21:09.114Z",
|
| 306 |
+
"planning": {
|
| 307 |
+
"goal_restated": "Find which regions drive revenue and why Q1 dipped.",
|
| 308 |
+
"assumptions": ["'last quarter' = Q1 2026"],
|
| 309 |
+
"steps": [
|
| 310 |
+
{ "step": 1, "stage": "data_understanding", "objective": "Inventory the sales source" },
|
| 311 |
+
{ "step": 2, "stage": "modeling", "objective": "Aggregate revenue by region" }
|
| 312 |
+
]
|
| 313 |
+
},
|
| 314 |
+
"thinking": "The question needs a per-region breakdown plus a cause, so I inventory the source, aggregate revenue by region, then compare quarters.",
|
| 315 |
+
"tool_calls": [
|
| 316 |
+
{
|
| 317 |
+
"order": 1,
|
| 318 |
+
"name": "check_data",
|
| 319 |
+
"input": { "source_hint": "structured" },
|
| 320 |
+
"output": { "kind": "table", "summary": "1 source · 1 table (orders) · 48,213 rows" },
|
| 321 |
+
"status": "success"
|
| 322 |
+
},
|
| 323 |
+
{
|
| 324 |
+
"order": 2,
|
| 325 |
+
"name": "retrieve_data",
|
| 326 |
+
"input": { "source_id": "src_sales_db", "table_id": "orders",
|
| 327 |
+
"select": ["region", "amount"], "group_by": ["region"] },
|
| 328 |
+
"output": { "kind": "table", "columns": ["region", "total"], "row_count": 4,
|
| 329 |
+
"preview": [["Central", 1210000], ["East", 740000]] },
|
| 330 |
+
"status": "success"
|
| 331 |
+
}
|
| 332 |
+
],
|
| 333 |
+
"sources": [
|
| 334 |
+
{
|
| 335 |
+
"type": "database",
|
| 336 |
+
"source_id": "src_sales_db",
|
| 337 |
+
"name": "orders",
|
| 338 |
+
"query": "SELECT region, SUM(amount) AS total FROM orders GROUP BY region",
|
| 339 |
+
"detail": { "tables": ["orders"], "row_count": 48213 }
|
| 340 |
+
}
|
| 341 |
+
]
|
| 342 |
+
}
|
| 343 |
+
```
|
| 344 |
+
|
| 345 |
+
**200 response — `unstructured_flow` turn** (no planner; source = document, with the retrieval query):
|
| 346 |
+
|
| 347 |
+
```json
|
| 348 |
+
{
|
| 349 |
+
"analysis_id": "an_42",
|
| 350 |
+
"message_id": "msg_55",
|
| 351 |
+
"intent": "unstructured_flow",
|
| 352 |
+
"generated_at": "2026-06-30T03:40:02.001Z",
|
| 353 |
+
"planning": null,
|
| 354 |
+
"thinking": null,
|
| 355 |
+
"tool_calls": [
|
| 356 |
+
{ "order": 1, "name": "retrieve_knowledge",
|
| 357 |
+
"input": { "query": "technology stack used in this project", "top_k": 4 },
|
| 358 |
+
"output": { "kind": "documents", "row_count": 4 }, "status": "success" }
|
| 359 |
+
],
|
| 360 |
+
"sources": [
|
| 361 |
+
{ "type": "document", "document_id": "doc_7", "filename": "tech_handbook.pdf",
|
| 362 |
+
"page_label": "12", "query": "technology stack used in this project",
|
| 363 |
+
"snippet": "The backend is built on FastAPI with async SQLAlchemy…", "score": 0.83 }
|
| 364 |
+
]
|
| 365 |
+
}
|
| 366 |
+
```
|
| 367 |
+
|
| 368 |
+
**200 response — simple `chat` / greeting turn** (nothing to trace):
|
| 369 |
+
|
| 370 |
+
```json
|
| 371 |
+
{
|
| 372 |
+
"analysis_id": "an_42",
|
| 373 |
+
"message_id": "msg_12",
|
| 374 |
+
"intent": "chat",
|
| 375 |
+
"generated_at": "2026-06-30T03:05:00.000Z",
|
| 376 |
+
"planning": null,
|
| 377 |
+
"thinking": null,
|
| 378 |
+
"tool_calls": [],
|
| 379 |
+
"sources": []
|
| 380 |
+
}
|
| 381 |
+
```
|
| 382 |
+
|
| 383 |
+
**404** — no provenance for that message yet (turn still running or unknown id):
|
| 384 |
+
|
| 385 |
+
```json
|
| 386 |
+
{ "detail": "No observability for message 'msg_88f1' yet." }
|
| 387 |
+
```
|
| 388 |
+
|
| 389 |
+
> ⚠️ **Richness is path-dependent.** Full `planning` + tool I/O exist only when
|
| 390 |
+
> `ENABLE_SLOW_PATH=true`. Fast chat / single-query / help still record `sources` + the single
|
| 391 |
+
> tool call but have `planning: null`. This matches the rule "planning only when the planner runs."
|
DEV_PLAN.md
CHANGED
|
@@ -1,10 +1,47 @@
|
|
| 1 |
-
# Data Eyond — Current Development Plan (post 2026-06-24
|
| 2 |
|
| 3 |
**Purpose:** context file for Claude Code sessions working on the current sprint.
|
| 4 |
-
**Branch:** `pr/
|
| 5 |
**Companion:** [REPO_STATUS.md](REPO_STATUS.md) describes the repo's *current built state*; this file
|
| 6 |
-
describes the *in-flight plan* that changes it.
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
---
|
| 10 |
|
|
|
|
| 1 |
+
# Data Eyond — Current Development Plan (post 2026-06-24 → 2026-06-30 checkpoints)
|
| 2 |
|
| 3 |
**Purpose:** context file for Claude Code sessions working on the current sprint.
|
| 4 |
+
**Branch:** `pr/5` · **Snapshot:** 2026-06-30.
|
| 5 |
**Companion:** [REPO_STATUS.md](REPO_STATUS.md) describes the repo's *current built state*; this file
|
| 6 |
+
describes the *in-flight plan* that changes it. The **active sprint is pr/5** ([§0](#0-current-sprint--pr5-observability--endpoint-restructure)); sections §1–§6 are the prior
|
| 7 |
+
2026-06-24/25 pivot (now largely ✅), kept for context.
|
| 8 |
+
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
## 0. Current sprint — pr/5: Observability + Endpoint Restructure
|
| 12 |
+
|
| 13 |
+
From the **2026-06-30 checkpoint**. Direction: **Python → generation/AI-only**; Go owns the analysis
|
| 14 |
+
lifecycle + data plane. Endpoint contract sent to Harry on 2026-06-30:
|
| 15 |
+
[API_ENDPOINTS_RESTRUCTURE.md](API_ENDPOINTS_RESTRUCTURE.md) (chat→v2, tools regroup, observability —
|
| 16 |
+
observability marked tentative). REPO_STATUS carries a matching `pr/5` direction banner.
|
| 17 |
+
|
| 18 |
+
Mentor's task order: **unwire → regroup endpoints → add tools (retrieve-data + observability)**; share
|
| 19 |
+
the endpoint contract *before* coding the tools. Status legend: ⬜ not started · 🔄 in progress · ✅ done ·
|
| 20 |
+
⛔ blocked · 🔎 verify · ⏸️ deferred.
|
| 21 |
+
|
| 22 |
+
| Phase | Task | Owner | Status | Notes |
|
| 23 |
+
|---|---|---|---|---|
|
| 24 |
+
| **P0 — contract** | Draft + send endpoint contract to Harry (chat v2 · tools group · observability) | Rifqi + Sofhia | ✅ | `API_ENDPOINTS_RESTRUCTURE.md` sent 2026-06-30 (before-noon deadline met). Observability section flagged tentative. |
|
| 25 |
+
| **1 — unwire** | Unwire `users`(login)/`document`/`room`/`db_client`/`data_catalog`/`analysis` from `main` + Swagger | Sofhia | ✅ | **KM-686**, commit `0b2d678`. Commented, not deleted; `chat`/`report`/`tools` kept mounted. Resolves the analysis-CRUD scope Q — whole `analysis` router unwired (Go owns it). |
|
| 26 |
+
| **2 — v2 + regroup** | Create `src/api/v2/` and move the chat pilot there | Rifqi | ✅ | New `src/api/v2/__init__.py` + `src/api/v2/chat.py` (`POST /api/v2/chat/stream`), mounted in `main.py`. Only chat in v2; v1 `/chat/stream` kept mounted until FE moves over. Routes import-verified. |
|
| 27 |
+
| **2 — v2 + regroup** | Chat: `room_id` → **`analysis_id`** (request field + handler + history) | Rifqi | ✅ | v2 `ChatRequest{user_id, analysis_id, message, message_id?}`; reuses warm `ChatHandler` + v1 cache/history helpers; `done` returns `{message_id}` (minted Python-side if Go omits, open-Q #1). Persistence kept transitionally → still ties to #25 (`analyses_messages`); ruff-clean. |
|
| 28 |
+
| **2 — v2 + regroup** | Move report under tools → `/api/v1/tools/report` (+ version routes) | Rifqi | ✅ | report router re-prefixed `/api/v1` → `/api/v1/tools` (all 3 routes move together), tag → `Tools`; old `/api/v1/report` gone. Same functionality, new home. Import-verified. |
|
| 29 |
+
| **2 — v2 + regroup** | Move help under tools → `POST /api/v1/tools/help` (dedicated endpoint) | Sofhia | ✅ | New `src/api/v1/help.py` (SSE: `sources:[]`→`chunk`→`done{message_id}`) + additive `ChatHandler.stream_help()` (reuses HelpAgent+state+readiness, no router). Generative-only (no persist). **Router `help` intent KEPT** — both paths live by design. message_id minted Python-side if Go omits (open-Q #1). Import-verified. |
|
| 30 |
+
| **2 — v2 + regroup** | Tools list → `/api/v1/tools/list` | Sofhia | ✅ | Renamed route `GET /api/v1/tools` → `GET /api/v1/tools/list` ([tools.py:133](src/api/v1/tools.py:133)). |
|
| 31 |
+
| **2 — v2 + regroup** | FE: slash menu = `/help` only; report = right-side button | Mentor (FE) | ⬜ | Coordination note, not Python work. |
|
| 32 |
+
| **3 — tools + obs** | Finish `help` so it actually **calls** (not just lists) + test | Sofhia | ⬜ | Mentor: help currently only lists tools. Core #2 after chat. |
|
| 33 |
+
| **3 — tools + obs** | Observability **scratchpad** (decorator) accumulating in the chat agent | Rifqi + Sofhia | ⬜ | Capture planning / tool I/O / sources during the run; flush one record on `done`. |
|
| 34 |
+
| **3 — tools + obs** | Audit `report_inputs` — covers planning + tool I/O + source? add cols / new store | Rifqi | ⬜ | **Rec:** dedicated provenance store = 1 JSONB row per message (logical 3 sections); keep Langfuse for engineering. |
|
| 35 |
+
| **3 — tools + obs** | Build `GET /api/v1/observability` (one merged response) | Rifqi | ⬜ | Intent-based source rules (greeting/help = none; retrieve = required). Richness path-dependent (full planning only on slow path). |
|
| 36 |
+
| **3 — tools + obs** | Keep stream **text-only**; observability is a separate parallel call | Rifqi | ⬜ | Per mentor — don't slow the stream. |
|
| 37 |
+
| **3 — tools + obs** | Resolve `message_id` correlation (stream ↔ observability) with Harry | Rifqi ↔ Harry | ⬜ | **Rec:** Go mints `message_id`, passes in chat request, Python echoes on `done`. |
|
| 38 |
+
| **4 — biz questions** | Get Go folder; confirm `business_questions` in create-analysis (max 5); sync Python | Harry/Mentor → Rifqi | ⬜ | Go currently missing the field ("lagi difixing"). Python already models objective + business_questions. |
|
| 39 |
+
| **deferred** | Report formats: PPT (preferred) / PDF / infographic on download | — | ⏸️ | MD is fine for the FE preview stage now. |
|
| 40 |
+
| **deferred** | Charts (Plotly→JSON) + images tables | — | ⏸️ | Carried from §4 #26/#27. |
|
| 41 |
+
|
| 42 |
+
**Next up:** Phase 2 Python work is **done** (chat→v2 `analysis_id`; `help`/`report`/`list` regrouped
|
| 43 |
+
under `/api/v1/tools/`). Remaining: **Phase 3** — the observability scratchpad + `GET /api/v1/observability`
|
| 44 |
+
(shape already speced in the contract), then **Phase 4** (business questions, Go-blocked).
|
| 45 |
|
| 46 |
---
|
| 47 |
|
REPO_STATUS.md
CHANGED
|
@@ -2,13 +2,27 @@
|
|
| 2 |
|
| 3 |
**Audience:** teammates onboarding onto the Python repo (`Agentic-Service-Data-Eyond-Catalog`).
|
| 4 |
**Scope:** what the code does **right now** (branch `pr/4`, ticket KM-652). Describes current state only — no roadmap or to-dos.
|
| 5 |
-
**Snapshot date:** 2026-06-25.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
> This file is grounded in the source, not the older design docs. Where the two
|
| 8 |
> disagree, the code wins — see [§11 Doc-vs-code](#11-where-the-older-docs-are-stale).
|
| 9 |
> `REPO_CONTEXT.md` / `ARCHITECTURE.md` are the original Phase-2 design docs and are
|
| 10 |
> stale on the router, joins, and the analysis/report stack.
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
---
|
| 13 |
|
| 14 |
## 1. The product in one paragraph
|
|
@@ -31,9 +45,13 @@ streaming.
|
|
| 31 |
| Repo | Role | We edit? |
|
| 32 |
|---|---|---|
|
| 33 |
| **Python** — `Agentic-Service-Data-Eyond-Catalog` (this repo) | The agentic LLM service: router, gate, skills, slow analytical path, structured query engine, unstructured RAG, report generation, analysis-session state. FastAPI + async SQLAlchemy + LangChain + Azure GPT-4o. | **Yes — the only repo we edit.** |
|
| 34 |
-
| **Go** — `Orchestrator-Agent-Service` | Gateway / data plane:
|
| 35 |
| **FE** — `E2E-Frontend-Data-Eyond` | React/Vite SPA. Talks to Go for everything and to Python only for chat streaming. | Reference only. |
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
Shared infra: **Postgres** (app tables + `data_catalog` jsonb + PGVector `langchain_pg_embedding`), **Azure Blob**, and (Python-only) **Redis**.
|
| 38 |
|
| 39 |
---
|
|
@@ -59,6 +77,10 @@ Tests live locally and are gitignored. Run with `./.venv/Scripts/python.exe -m p
|
|
| 59 |
Entry: `POST /api/v1/chat/stream` (`src/api/v1/chat.py`) → `ChatHandler.handle(...)`
|
| 60 |
(`src/agents/chat_handler.py`). One shared `ChatHandler` per process keeps the Azure clients warm.
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
```
|
| 63 |
POST /chat/stream { user_id, room_id, message }
|
| 64 |
│ (analysis_id == room_id — one session = one analysis = one chat room)
|
|
@@ -131,6 +153,11 @@ Two facts to internalise:
|
|
| 131 |
|
| 132 |
## 7. API surface (this repo, all under `/api/v1`)
|
| 133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
| Endpoint | Purpose | Caller |
|
| 135 |
|---|---|---|
|
| 136 |
| `POST /chat/stream` | Main chat SSE (router → dispatch) | FE → Go → Python (the only FE→Python call today) |
|
|
@@ -153,10 +180,18 @@ unless `SKIP_INIT_DB=true`.
|
|
| 153 |
| `documents`, `databases` | uploads + DB creds (Fernet-encrypted) | Go ingestion | executor cred resolution |
|
| 154 |
| `data_catalog` | per-user jsonb `Catalog` (Source → Table → Column) | Go ingestion / Python pipeline | CatalogReader, planner, tools |
|
| 155 |
| `langchain_pg_embedding` | PGVector document chunks | Go ingestion | DocumentRetriever |
|
| 156 |
-
| `analysis_records` | jsonb `AnalysisRecord`, one per slow-path run | slow path | ReportGenerator, report readiness |
|
| 157 |
-
| `
|
| 158 |
-
| `reports` *(dedorch)* | uuid, `title` + markdown `content` + `version` | ReportStore | report API |
|
| 159 |
-
| `data_sources` *(dedorch)* | per-analysis binding; `reference_id` = catalog source_id | `/analysis/create` | structured-flow scoping, report appendix |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
**Catalog shape** (the jsonb in `data_catalog`):
|
| 162 |
`Catalog → Source[ {source_id, source_type ∈ schema|tabular|unstructured, name, location_ref} → Table[ {table_id, name, row_count, foreign_keys[]} → Column[ {column_id, name, data_type, nullable, pii_flag, sample_values|null, stats} ] ] ]`. PII columns have `sample_values: null` so real values never enter prompts.
|
|
@@ -268,25 +303,40 @@ copies disagree with the current code on:
|
|
| 268 |
| Analysis / report / gate / slow path | "Phase 2 spine only" | All built and present |
|
| 269 |
| `analysis_id` | open question | resolved: **`analysis_id == room_id`** |
|
| 270 |
| Report source | (newer invariant) "from records, never chat history" | confirmed: generator reads `AnalysisRecord`s |
|
|
|
|
| 271 |
|
| 272 |
---
|
| 273 |
|
| 274 |
## 12. dedorch migration — current state
|
| 275 |
|
| 276 |
The Python DB is moving from `dataeyond` → **dedorch** (Go owns dedorch migrations; Python is
|
| 277 |
-
consumer-only).
|
| 278 |
-
|
| 279 |
-
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
|
| 291 |
---
|
| 292 |
|
|
|
|
| 2 |
|
| 3 |
**Audience:** teammates onboarding onto the Python repo (`Agentic-Service-Data-Eyond-Catalog`).
|
| 4 |
**Scope:** what the code does **right now** (branch `pr/4`, ticket KM-652). Describes current state only — no roadmap or to-dos.
|
| 5 |
+
**Snapshot date:** 2026-06-25. **Cross-repo update 2026-06-29:** §2/§8/§11/§12 re-verified against
|
| 6 |
+
the **Go source** (`Orchestrator-Agent-Service`), not its docs. The Go service has moved well past its
|
| 7 |
+
own (uncommitted, stale) design docs: it now hosts the **dedorch SQL migrations** in-repo and a full
|
| 8 |
+
**`/api/v1/analyses` + `/api/v1/skills`** REST surface. Go does **not** call Python yet — those skills
|
| 9 |
+
are placeholders (see §12).
|
| 10 |
|
| 11 |
> This file is grounded in the source, not the older design docs. Where the two
|
| 12 |
> disagree, the code wins — see [§11 Doc-vs-code](#11-where-the-older-docs-are-stale).
|
| 13 |
> `REPO_CONTEXT.md` / `ARCHITECTURE.md` are the original Phase-2 design docs and are
|
| 14 |
> stale on the router, joins, and the analysis/report stack.
|
| 15 |
|
| 16 |
+
> 🚧 **Direction update 2026-06-30 (pr/5 — DECIDED · IN PROGRESS).** The 30 June checkpoint locked a
|
| 17 |
+
> restructure (contract: [API_ENDPOINTS_RESTRUCTURE.md](API_ENDPOINTS_RESTRUCTURE.md); live tracker:
|
| 18 |
+
> [DEV_PLAN §0](DEV_PLAN.md)). **Python is becoming a generation/AI-only service** — Go owns the full
|
| 19 |
+
> analysis lifecycle *and* the data-plane endpoints. Scope:
|
| 20 |
+
> - **Unwired from `main` + Swagger** (router files kept, *not* deleted): `analysis` CRUD, `room`, `db_client`, `document`, `data_catalog`, `users`/login. **✅ DONE — KM-686, commit `0b2d678`** (so the §7 rows for these are now commented out of `main.py`).
|
| 21 |
+
> - **AI surface that stays live:** `chat` → **`POST /api/v2/chat/stream`** (explicit **`analysis_id`**, not `room_id`); the skills regroup under **`/api/v1/tools/`** (`list` · `help` · `report`); plus a **new `GET /api/v1/observability`** (Responsible-AI provenance per answer, backed by a provenance store — shape TBD). **⬜ pending.**
|
| 22 |
+
> - **Only `chat/stream` moves to `/api/v2`;** everything else stays `/api/v1`.
|
| 23 |
+
>
|
| 24 |
+
> §2/§4/§7 below still describe the **pre-restructure wiring** except the unwire above, which has landed.
|
| 25 |
+
|
| 26 |
---
|
| 27 |
|
| 28 |
## 1. The product in one paragraph
|
|
|
|
| 45 |
| Repo | Role | We edit? |
|
| 46 |
|---|---|---|
|
| 47 |
| **Python** — `Agentic-Service-Data-Eyond-Catalog` (this repo) | The agentic LLM service: router, gate, skills, slow analytical path, structured query engine, unstructured RAG, report generation, analysis-session state. FastAPI + async SQLAlchemy + LangChain + Azure GPT-4o. | **Yes — the only repo we edit.** |
|
| 48 |
+
| **Go** — `Orchestrator-Agent-Service` | Gateway / data plane: auth/JWT, documents (Azure Blob + CSV/XLSX→Parquet + embeddings), database_clients (Fernet creds), **catalog ingestion** (moved into Go, KM-578/590), **all dedorch SQL migrations** (now embedded in the Go repo: `internal/repository/postgres/migrations/0001–0004`), and the **full analysis-lifecycle REST surface** (`/api/v1/analyses` CRUD + messages + reports, `/api/v1/skills`). The **interview agent and chat-rooms are deprecated → HTTP 410** (`internal/api/deprecation.go`). | Reference only. |
|
| 49 |
| **FE** — `E2E-Frontend-Data-Eyond` | React/Vite SPA. Talks to Go for everything and to Python only for chat streaming. | Reference only. |
|
| 50 |
|
| 51 |
+
> **» pr/5 (decided, not yet in code):** Python's non-AI endpoints (analysis CRUD, `room`, `document`,
|
| 52 |
+
> `db_client`, `data_catalog`, `users`/login) are being **unwired** — Python keeps only the
|
| 53 |
+
> generation/AI surface (chat, tools: `help`/`report`/`list`, observability). See the Direction-update banner.
|
| 54 |
+
|
| 55 |
Shared infra: **Postgres** (app tables + `data_catalog` jsonb + PGVector `langchain_pg_embedding`), **Azure Blob**, and (Python-only) **Redis**.
|
| 56 |
|
| 57 |
---
|
|
|
|
| 77 |
Entry: `POST /api/v1/chat/stream` (`src/api/v1/chat.py`) → `ChatHandler.handle(...)`
|
| 78 |
(`src/agents/chat_handler.py`). One shared `ChatHandler` per process keeps the Azure clients warm.
|
| 79 |
|
| 80 |
+
> **» pr/5:** this endpoint moves to **`POST /api/v2/chat/stream`** with an explicit **`analysis_id`**
|
| 81 |
+
> field (replacing `room_id`), and the observability detail (planning / tool I/O / sources) moves out of
|
| 82 |
+
> the stream to a separate `GET /api/v1/observability` call. See the Direction-update banner.
|
| 83 |
+
|
| 84 |
```
|
| 85 |
POST /chat/stream { user_id, room_id, message }
|
| 86 |
│ (analysis_id == room_id — one session = one analysis = one chat room)
|
|
|
|
| 153 |
|
| 154 |
## 7. API surface (this repo, all under `/api/v1`)
|
| 155 |
|
| 156 |
+
> **» pr/5 (decided, not yet in code):** chat → `/api/v2/chat/stream` (`analysis_id`); `/tools` splits
|
| 157 |
+
> into `/tools/list` + `/tools/help` + `/tools/report`; new `/api/v1/observability`; and the
|
| 158 |
+
> analysis-CRUD / `room` / `users` / `document` / `db_client` / `data_catalog` rows are unwired from
|
| 159 |
+
> `main` + Swagger. See the Direction-update banner.
|
| 160 |
+
|
| 161 |
| Endpoint | Purpose | Caller |
|
| 162 |
|---|---|---|
|
| 163 |
| `POST /chat/stream` | Main chat SSE (router → dispatch) | FE → Go → Python (the only FE→Python call today) |
|
|
|
|
| 180 |
| `documents`, `databases` | uploads + DB creds (Fernet-encrypted) | Go ingestion | executor cred resolution |
|
| 181 |
| `data_catalog` | per-user jsonb `Catalog` (Source → Table → Column) | Go ingestion / Python pipeline | CatalogReader, planner, tools |
|
| 182 |
| `langchain_pg_embedding` | PGVector document chunks | Go ingestion | DocumentRetriever |
|
| 183 |
+
| `report_inputs` *(was `analysis_records`)* | jsonb `AnalysisRecord`, one per slow-path run; **Python-owned** | slow path | ReportGenerator, report readiness |
|
| 184 |
+
| `analyses` *(dedorch, plural)* | uuid `id`, `user_id`, `analysis_title`, `objective`, `business_questions` jsonb, `status` (active\|inactive), `data_bind`(+`data_bind_version`), `report_id`, `report_collection` — **defined by Go migrations**; `problem_statement`/`problem_validated`/`owner_id` already **dropped** there (`0003`/`0004`) | Go `/api/v1/analyses`; Python state store | gate (no-op), Help, report |
|
| 185 |
+
| `reports` *(dedorch)* | uuid, `analysis_id`, `user_id`, `title` + markdown `content` + `version` (UNIQUE per analysis) | Go + Python ReportStore | report API |
|
| 186 |
+
| `data_sources` *(dedorch)* | per-analysis binding; `reference_id` = catalog source_id; `type ∈ document\|database` | Go `/analyses/{id}/data-bind` (+ Python `/analysis/create`) | structured-flow scoping, report appendix |
|
| 187 |
+
| `analyses_messages` *(dedorch)* | the analysis chat room (`role ∈ user\|ai`); replaces deprecated `rooms`/`chat_messages` | Go `/analyses/{id}/messages` | Python chat path **not yet migrated here** (§12) |
|
| 188 |
+
|
| 189 |
+
> ⚠️ **Python ORM ↔ dedorch drift (verified 2026-06-29).** Python's `AnalysisStateRow` + `state_store.py`
|
| 190 |
+
> still model **`problem_statement` / `problem_validated`** and do **not** carry `objective` /
|
| 191 |
+
> `business_questions`, but the Go migrations have already dropped the former and added the latter.
|
| 192 |
+
> Pre-cutover this is harmless (Python runs `create_all` on its own copy); **post-`SKIP_INIT_DB`**, when
|
| 193 |
+
> Python reads dedorch directly, ORM column selection on the dropped columns will break. Reconcile the
|
| 194 |
+
> Python model before the connection-string cutover.
|
| 195 |
|
| 196 |
**Catalog shape** (the jsonb in `data_catalog`):
|
| 197 |
`Catalog → Source[ {source_id, source_type ∈ schema|tabular|unstructured, name, location_ref} → Table[ {table_id, name, row_count, foreign_keys[]} → Column[ {column_id, name, data_type, nullable, pii_flag, sample_values|null, stats} ] ] ]`. PII columns have `sample_values: null` so real values never enter prompts.
|
|
|
|
| 303 |
| Analysis / report / gate / slow path | "Phase 2 spine only" | All built and present |
|
| 304 |
| `analysis_id` | open question | resolved: **`analysis_id == room_id`** |
|
| 305 |
| Report source | (newer invariant) "from records, never chat history" | confirmed: generator reads `AnalysisRecord`s |
|
| 306 |
+
| Go service scope | "interview agent + ingestion; dedorch migrations live outside the repos" | Go now hosts the **dedorch migrations in-repo** + a full **`/api/v1/analyses` + `/api/v1/skills`** REST surface; interview/rooms **deprecated (410)**. (Go's own `PROJECT_SUMMARY.md`/`REPO_CONTEXT.md` are uncommitted + stale.) |
|
| 307 |
|
| 308 |
---
|
| 309 |
|
| 310 |
## 12. dedorch migration — current state
|
| 311 |
|
| 312 |
The Python DB is moving from `dataeyond` → **dedorch** (Go owns dedorch migrations; Python is
|
| 313 |
+
consumer-only). State **re-verified against the Go source 2026-06-29**:
|
| 314 |
+
|
| 315 |
+
- **The dedorch migrations now live IN the Go repo** — embedded SQL at
|
| 316 |
+
`internal/repository/postgres/migrations/0001_create_core_schema.sql … 0004_replace_chat_with_analysis_scope.sql`,
|
| 317 |
+
run on startup by `RunMigrations`. (This corrects the earlier note that the migrations were
|
| 318 |
+
invisible / asserted only by Python docstrings.) The full schema is now readable there.
|
| 319 |
+
- **Go owns the analysis family end-to-end.** `analyses` / `analyses_messages` / `reports` /
|
| 320 |
+
`data_sources` / `message_sources` / `data_catalog` are created by Go migrations and served by a
|
| 321 |
+
full REST surface: `internal/api/analysis.go` (CRUD + `data-bind` w/ optimistic `expected_version`
|
| 322 |
+
+ messages + reports) and `internal/api/skills.go`. `analyses` already has the **pivot shape**
|
| 323 |
+
(`objective` + `business_questions`, `status`, `data_bind`/`_version`, `report_collection`) and has
|
| 324 |
+
**dropped** `problem_statement`/`problem_validated`/`owner_id`. Migration `0004` renames the legacy
|
| 325 |
+
`rooms`/`chat_messages`/`interview_*` tables to `zdeprecated_*`.
|
| 326 |
+
- **`report_inputs`** (the slow-path structured output, formerly `analysis_records`) stays
|
| 327 |
+
**Python-owned**; its finalized schema goes to Harry so the dedorch migration creates it post-cutover.
|
| 328 |
+
- The connection-string cutover (paired with `SKIP_INIT_DB`) **has not happened yet**; Python still
|
| 329 |
+
runs `create_all` on its own models until then.
|
| 330 |
+
|
| 331 |
+
**⚠️ Integration gap (verified — the big one).** Go's `/api/v1/analyses` and `/api/v1/skills`
|
| 332 |
+
(`help` / `report`) are **placeholders that return dummy data** — the `SendMessage` / `GenerateReport`
|
| 333 |
+
handlers and the skills handler explicitly note *"placeholder integrasi backend agentic … will be
|
| 334 |
+
replaced by the external skills service."* **Go currently never calls Python's `/chat/stream`,
|
| 335 |
+
`/report`, or any skill** (no outbound HTTP to the agentic service exists in the Go source). So today
|
| 336 |
+
there are **two parallel, unconnected analysis stacks**: Go's self-contained placeholder lifecycle
|
| 337 |
+
(gate: ≥3 user messages; AI replies are canned) and Python's real agentic spine (router → slow path →
|
| 338 |
+
records-based report; floor: ≥1 `analyze_*` success). Wiring Go → Python is the open integration work
|
| 339 |
+
(DEV_PLAN #7/#18/#25), plus reconciling the two different report gates.
|
| 340 |
|
| 341 |
---
|
| 342 |
|
main.py
CHANGED
|
@@ -7,15 +7,20 @@ from src.middlewares.logging import configure_logging, get_logger
|
|
| 7 |
from src.middlewares.cors import add_cors_middleware
|
| 8 |
from src.middlewares.rate_limit import limiter, _rate_limit_exceeded_handler
|
| 9 |
from slowapi.errors import RateLimitExceeded
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
from src.api.v1.chat import router as chat_router
|
| 12 |
-
from src.api.v1.room import router as room_router
|
| 13 |
-
from src.api.v1.users import router as users_router
|
| 14 |
-
from src.api.v1.db_client import router as db_client_router
|
| 15 |
-
from src.api.v1.data_catalog import router as data_catalog_router
|
| 16 |
from src.api.v1.report import router as report_router
|
| 17 |
-
from src.api.v1.analysis import router as analysis_router
|
| 18 |
from src.api.v1.tools import router as tools_router
|
|
|
|
|
|
|
| 19 |
from src.db.postgres.init_db import init_db
|
| 20 |
import os
|
| 21 |
import uvicorn
|
|
@@ -50,15 +55,18 @@ app.state.limiter = limiter
|
|
| 50 |
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
|
| 51 |
|
| 52 |
# Include routers
|
| 53 |
-
|
| 54 |
-
app.include_router(
|
| 55 |
-
app.include_router(
|
| 56 |
-
app.include_router(
|
| 57 |
-
app.include_router(db_client_router)
|
| 58 |
-
app.include_router(data_catalog_router)
|
|
|
|
|
|
|
| 59 |
app.include_router(report_router)
|
| 60 |
-
app.include_router(analysis_router)
|
| 61 |
app.include_router(tools_router)
|
|
|
|
|
|
|
| 62 |
|
| 63 |
|
| 64 |
@app.get("/")
|
|
|
|
| 7 |
from src.middlewares.cors import add_cors_middleware
|
| 8 |
from src.middlewares.rate_limit import limiter, _rate_limit_exceeded_handler
|
| 9 |
from slowapi.errors import RateLimitExceeded
|
| 10 |
+
# --- pr/5 Phase 1: unwire non-AI routers (Go owns these now). ---
|
| 11 |
+
# Routers below are commented out, NOT deleted. The router files stay alive;
|
| 12 |
+
# they're just not mounted, so they also disappear from Swagger.
|
| 13 |
+
# from src.api.v1.document import router as document_router # unwired: Go handles documents
|
| 14 |
+
# from src.api.v1.room import router as room_router # unwired: replaced by analysis_id
|
| 15 |
+
# from src.api.v1.users import router as users_router # unwired: login moved off Python
|
| 16 |
+
# from src.api.v1.db_client import router as db_client_router # unwired: Go registers DB client
|
| 17 |
+
# from src.api.v1.data_catalog import router as data_catalog_router # unwired: Go handles the catalog
|
| 18 |
+
# from src.api.v1.analysis import router as analysis_router # unwired: Go owns create/update analysis
|
| 19 |
from src.api.v1.chat import router as chat_router
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
from src.api.v1.report import router as report_router
|
|
|
|
| 21 |
from src.api.v1.tools import router as tools_router
|
| 22 |
+
from src.api.v1.help import router as help_router # pr/5 Phase 2: dedicated /tools/help
|
| 23 |
+
from src.api.v2.chat import router as chat_v2_router # pr/5 Phase 2: v2 chat pilot (analysis_id)
|
| 24 |
from src.db.postgres.init_db import init_db
|
| 25 |
import os
|
| 26 |
import uvicorn
|
|
|
|
| 55 |
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
|
| 56 |
|
| 57 |
# Include routers
|
| 58 |
+
# --- pr/5 Phase 1: AI-only surface. Non-AI routers unwired (Go owns them). ---
|
| 59 |
+
# app.include_router(users_router) # unwired: login moved off Python
|
| 60 |
+
# app.include_router(document_router) # unwired: Go handles documents
|
| 61 |
+
# app.include_router(room_router) # unwired: replaced by analysis_id
|
| 62 |
+
# app.include_router(db_client_router) # unwired: Go registers DB client
|
| 63 |
+
# app.include_router(data_catalog_router) # unwired: Go handles the catalog
|
| 64 |
+
# app.include_router(analysis_router) # unwired: Go owns create/update analysis
|
| 65 |
+
app.include_router(chat_router) # v1 chat/stream (room_id) — kept until FE moves to v2
|
| 66 |
app.include_router(report_router)
|
|
|
|
| 67 |
app.include_router(tools_router)
|
| 68 |
+
app.include_router(help_router)
|
| 69 |
+
app.include_router(chat_v2_router) # pr/5 Phase 2: POST /api/v2/chat/stream (analysis_id)
|
| 70 |
|
| 71 |
|
| 72 |
@app.get("/")
|
src/agents/chat_handler.py
CHANGED
|
@@ -227,6 +227,55 @@ class ChatHandler:
|
|
| 227 |
# Public entry
|
| 228 |
# ------------------------------------------------------------------
|
| 229 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
async def handle(
|
| 231 |
self,
|
| 232 |
message: str,
|
|
|
|
| 227 |
# Public entry
|
| 228 |
# ------------------------------------------------------------------
|
| 229 |
|
| 230 |
+
async def stream_help(
|
| 231 |
+
self,
|
| 232 |
+
user_id: str,
|
| 233 |
+
analysis_id: str | None,
|
| 234 |
+
history: list[BaseMessage] | None = None,
|
| 235 |
+
message: str | None = None,
|
| 236 |
+
) -> AsyncIterator[dict[str, Any]]:
|
| 237 |
+
"""Deterministic `help` dispatch for the dedicated `/api/v1/tools/help` endpoint.
|
| 238 |
+
|
| 239 |
+
Bypasses the intent router — the slash command IS the intent, so there is no
|
| 240 |
+
classify round-trip and no misclassification risk. Streams the same guidance as
|
| 241 |
+
the `help` branch of `handle()`, reusing the warm HelpAgent + state store.
|
| 242 |
+
|
| 243 |
+
Emits SSE-style events: `sources` (always `[]` — help never references
|
| 244 |
+
documents), `chunk`*, then `done` (data left empty; the endpoint stamps the
|
| 245 |
+
`message_id`). On failure, yields a terminal `error` event.
|
| 246 |
+
"""
|
| 247 |
+
# Load (or lazily create) the analysis state; fail closed to a not-validated
|
| 248 |
+
# stub so help degrades gracefully on a missing row / read error / legacy id.
|
| 249 |
+
state: AnalysisState | None = None
|
| 250 |
+
if analysis_id:
|
| 251 |
+
try:
|
| 252 |
+
state = await self._get_state_store().ensure(analysis_id, user_id)
|
| 253 |
+
except Exception as e: # noqa: BLE001 — never block help on a state read
|
| 254 |
+
logger.warning("help state ensure failed", analysis_id=analysis_id, error=str(e))
|
| 255 |
+
if state is None:
|
| 256 |
+
state = await self._load_analysis_state(analysis_id)
|
| 257 |
+
|
| 258 |
+
# report_ready (seam #5): deterministic, never-throws (fails closed to
|
| 259 |
+
# not-ready) — the HelpAgent guard only offers generate_report when ready.
|
| 260 |
+
from .report.readiness import is_report_ready
|
| 261 |
+
|
| 262 |
+
report_ready = await is_report_ready(analysis_id, state)
|
| 263 |
+
|
| 264 |
+
yield {"event": "sources", "data": json.dumps([])}
|
| 265 |
+
try:
|
| 266 |
+
async for token in self._get_help_agent().astream(
|
| 267 |
+
state,
|
| 268 |
+
history=history,
|
| 269 |
+
message=message,
|
| 270 |
+
report_ready=report_ready,
|
| 271 |
+
):
|
| 272 |
+
yield {"event": "chunk", "data": token}
|
| 273 |
+
except Exception as e: # noqa: BLE001
|
| 274 |
+
logger.error("help streaming failed", user_id=user_id, error=str(e))
|
| 275 |
+
yield {"event": "error", "data": f"Help generation failed: {e}"}
|
| 276 |
+
return
|
| 277 |
+
yield {"event": "done", "data": ""}
|
| 278 |
+
|
| 279 |
async def handle(
|
| 280 |
self,
|
| 281 |
message: str,
|
src/api/v1/help.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""`help` skill endpoint — dedicated, deterministic dispatch (pr/5 Phase 2).
|
| 2 |
+
|
| 3 |
+
`POST /api/v1/tools/help` streams state-aware next-step guidance over SSE. Unlike v1
|
| 4 |
+
— where `/help` was reachable only by letting the intent router classify a chat
|
| 5 |
+
message — this endpoint dispatches Help directly: the slash command IS the intent, so
|
| 6 |
+
there is no router round-trip and no misclassification risk (contract open-Q #2,
|
| 7 |
+
resolved in favour of a dedicated endpoint).
|
| 8 |
+
|
| 9 |
+
Contract: `API_ENDPOINTS_RESTRUCTURE.md` §3. The SSE shape mirrors `/chat/stream`, but
|
| 10 |
+
help never references documents, so `sources` is always `[]` and there are no `status`
|
| 11 |
+
pings. The `done` event carries the assistant `message_id` for observability
|
| 12 |
+
correlation (§7).
|
| 13 |
+
|
| 14 |
+
Python is generative-only (06-25 direction): this endpoint does NOT persist the turn —
|
| 15 |
+
Go owns writes to `analyses_messages`. It only generates + streams.
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
import json
|
| 19 |
+
import uuid
|
| 20 |
+
from typing import Optional
|
| 21 |
+
|
| 22 |
+
from fastapi import APIRouter, Depends, HTTPException
|
| 23 |
+
from pydantic import BaseModel
|
| 24 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
| 25 |
+
from sse_starlette.sse import EventSourceResponse
|
| 26 |
+
|
| 27 |
+
# Reuse the warm, process-shared ChatHandler (keeps HelpAgent + Azure clients warm)
|
| 28 |
+
# and the same history loader the chat endpoint uses. `load_history` reads by
|
| 29 |
+
# `analysis_id` (== room_id today); it moves to `analyses_messages` with DEV_PLAN #25.
|
| 30 |
+
from src.api.v1.chat import _chat_handler, load_history
|
| 31 |
+
from src.db.postgres.connection import get_db
|
| 32 |
+
from src.middlewares.logging import get_logger, log_execution
|
| 33 |
+
|
| 34 |
+
logger = get_logger("help_api")
|
| 35 |
+
|
| 36 |
+
router = APIRouter(prefix="/api/v1/tools", tags=["Tools"])
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class HelpRequest(BaseModel):
|
| 40 |
+
user_id: str
|
| 41 |
+
analysis_id: str
|
| 42 |
+
# ⚠️ open-Q #1: Go may mint the assistant turn id and pass it; if absent, Python
|
| 43 |
+
# mints one and returns it on `done` so the FE can call /observability in parallel.
|
| 44 |
+
message_id: Optional[str] = None
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
@router.post("/help")
|
| 48 |
+
@log_execution(logger)
|
| 49 |
+
async def help_stream(request: HelpRequest, db: AsyncSession = Depends(get_db)):
|
| 50 |
+
"""Stream state-aware next-step guidance (deterministic `/help` dispatch).
|
| 51 |
+
|
| 52 |
+
SSE event sequence:
|
| 53 |
+
1. sources — always `[]` (help never references documents)
|
| 54 |
+
2. chunk — text fragments of the guidance
|
| 55 |
+
3. done — `{"message_id": "..."}` for the observability lookup
|
| 56 |
+
"""
|
| 57 |
+
message_id = request.message_id or f"msg_{uuid.uuid4().hex[:12]}"
|
| 58 |
+
try:
|
| 59 |
+
history = await load_history(db, request.analysis_id, limit=10)
|
| 60 |
+
|
| 61 |
+
async def stream_response():
|
| 62 |
+
async for event in _chat_handler.stream_help(
|
| 63 |
+
request.user_id,
|
| 64 |
+
request.analysis_id,
|
| 65 |
+
history=history,
|
| 66 |
+
message=None,
|
| 67 |
+
):
|
| 68 |
+
if event["event"] == "done":
|
| 69 |
+
# Stamp the turn id so the FE can fetch /observability for it.
|
| 70 |
+
yield {"event": "done", "data": json.dumps({"message_id": message_id})}
|
| 71 |
+
elif event["event"] == "error":
|
| 72 |
+
yield event
|
| 73 |
+
return
|
| 74 |
+
else:
|
| 75 |
+
# `sources` ([]) and `chunk` pass through unchanged.
|
| 76 |
+
yield event
|
| 77 |
+
|
| 78 |
+
return EventSourceResponse(stream_response())
|
| 79 |
+
|
| 80 |
+
except Exception as e:
|
| 81 |
+
logger.error("Help failed", error=str(e))
|
| 82 |
+
raise HTTPException(status_code=500, detail=f"Help failed: {str(e)}")
|
src/api/v1/report.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
"""Report API (KM-644) — the dedicated "Generate Report" surface.
|
| 2 |
|
| 3 |
-
NOT a chat route. The frontend button calls these endpoints directly:
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
GET /report/{analysis_id}
|
|
|
|
| 7 |
|
| 8 |
Generation reads persisted AnalysisRecords + Problem Statement, makes one LLM call
|
| 9 |
(the executive summary), and persists an immutable versioned artifact. The
|
|
@@ -27,7 +28,10 @@ from src.models.api.report import ReportVersionEntry
|
|
| 27 |
|
| 28 |
logger = get_logger("report_api")
|
| 29 |
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
_generator = ReportGenerator()
|
| 33 |
_store = ReportStore()
|
|
|
|
| 1 |
"""Report API (KM-644) — the dedicated "Generate Report" surface.
|
| 2 |
|
| 3 |
+
NOT a chat route. The frontend button calls these endpoints directly (pr/5: regrouped
|
| 4 |
+
under /tools — Go owns the analysis lifecycle, Python only generates):
|
| 5 |
+
POST /api/v1/tools/report generate a new version for a session
|
| 6 |
+
GET /api/v1/tools/report/{analysis_id} list a session's report versions
|
| 7 |
+
GET /api/v1/tools/report/{analysis_id}/{ver} fetch one version
|
| 8 |
|
| 9 |
Generation reads persisted AnalysisRecords + Problem Statement, makes one LLM call
|
| 10 |
(the executive summary), and persists an immutable versioned artifact. The
|
|
|
|
| 28 |
|
| 29 |
logger = get_logger("report_api")
|
| 30 |
|
| 31 |
+
# pr/5 Phase 2: report regrouped under the tools surface (path → /api/v1/tools/report).
|
| 32 |
+
# Prefix change moves all three routes at once; same functionality, new home. The
|
| 33 |
+
# "Tools" tag groups it with /tools/list + /tools/help in Swagger.
|
| 34 |
+
router = APIRouter(prefix="/api/v1/tools", tags=["Tools"])
|
| 35 |
|
| 36 |
_generator = ReportGenerator()
|
| 37 |
_store = ReportStore()
|
src/api/v1/tools.py
CHANGED
|
@@ -130,11 +130,14 @@ _COMMAND_CATALOG: list[CommandResponse] = [
|
|
| 130 |
]
|
| 131 |
|
| 132 |
|
| 133 |
-
@router.get("/tools", response_model=ListToolsResponse)
|
| 134 |
@log_execution(logger)
|
| 135 |
async def list_tools() -> ListToolsResponse:
|
| 136 |
"""List the user-invocable slash-command catalog (skills + tools).
|
| 137 |
|
| 138 |
Static per deployment — safe for the Golang backend to cache.
|
|
|
|
|
|
|
|
|
|
| 139 |
"""
|
| 140 |
return ListToolsResponse(count=len(_COMMAND_CATALOG), tools=_COMMAND_CATALOG)
|
|
|
|
| 130 |
]
|
| 131 |
|
| 132 |
|
| 133 |
+
@router.get("/tools/list", response_model=ListToolsResponse)
|
| 134 |
@log_execution(logger)
|
| 135 |
async def list_tools() -> ListToolsResponse:
|
| 136 |
"""List the user-invocable slash-command catalog (skills + tools).
|
| 137 |
|
| 138 |
Static per deployment — safe for the Golang backend to cache.
|
| 139 |
+
|
| 140 |
+
pr/5 Phase 2: moved from `GET /api/v1/tools` to `GET /api/v1/tools/list` so the
|
| 141 |
+
skills group is `/tools/list` · `/tools/help` · `/tools/report`.
|
| 142 |
"""
|
| 143 |
return ListToolsResponse(count=len(_COMMAND_CATALOG), tools=_COMMAND_CATALOG)
|
src/api/v2/__init__.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""API v2 (pr/5). Only the chat pilot lives here — keyed on `analysis_id` instead of
|
| 2 |
+
`room_id`. The tools group (`/tools/list|help|report`) and observability stay on v1.
|
| 3 |
+
See API_ENDPOINTS_RESTRUCTURE.md §1.
|
| 4 |
+
"""
|
src/api/v2/chat.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Chat endpoint — v2 pilot (pr/5 Phase 2).
|
| 2 |
+
|
| 3 |
+
`POST /api/v2/chat/stream` is the v2 of the only FE→Python call. It is identical to
|
| 4 |
+
`POST /api/v1/chat/stream` except:
|
| 5 |
+
- the request carries an explicit **`analysis_id`** (replacing v1's `room_id`). The
|
| 6 |
+
two are the same session id today (`analysis_id == room_id`), so the warm,
|
| 7 |
+
process-shared `ChatHandler` and the v1 cache/history helpers are reused unchanged.
|
| 8 |
+
- the `done` event carries the assistant **`message_id`** (minted Python-side if Go
|
| 9 |
+
omits it — contract open-Q #1), so the FE can fetch `/api/v1/observability` for the
|
| 10 |
+
turn in parallel with the stream.
|
| 11 |
+
|
| 12 |
+
Only chat moves to v2; the tools group + observability stay on `/api/v1` (contract:
|
| 13 |
+
API_ENDPOINTS_RESTRUCTURE.md §1).
|
| 14 |
+
|
| 15 |
+
⚠️ Persistence (transitional). This mirrors v1: it still load/saves turn history via the
|
| 16 |
+
analysis-keyed message tables so multi-turn context works in the playground. Moving the
|
| 17 |
+
read/write to Go-owned `analyses_messages` (and making Python read-only) is DEV_PLAN #25.
|
| 18 |
+
Note Sofhia's `/tools/help` is already generative-only — align chat with that under #25.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
import json
|
| 22 |
+
import uuid
|
| 23 |
+
from typing import Any
|
| 24 |
+
|
| 25 |
+
from fastapi import APIRouter, Depends, HTTPException
|
| 26 |
+
from pydantic import BaseModel
|
| 27 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
| 28 |
+
from sse_starlette.sse import EventSourceResponse
|
| 29 |
+
|
| 30 |
+
# Reuse the v1 chat machinery verbatim (warm ChatHandler + cache/history helpers) so
|
| 31 |
+
# v2 stays a thin field-rename over the same logic. Importing the module-private helpers
|
| 32 |
+
# is the established pattern here (handlers/help.py imports `_chat_handler` the same way).
|
| 33 |
+
from src.api.v1.chat import (
|
| 34 |
+
_CACHEABLE_INTENTS,
|
| 35 |
+
_chat_cache_key,
|
| 36 |
+
_chat_handler,
|
| 37 |
+
_fast_intent,
|
| 38 |
+
cache_response,
|
| 39 |
+
get_cached_response,
|
| 40 |
+
load_history,
|
| 41 |
+
save_messages,
|
| 42 |
+
)
|
| 43 |
+
from src.db.postgres.connection import get_db
|
| 44 |
+
from src.db.redis.connection import get_redis
|
| 45 |
+
from src.middlewares.logging import get_logger, log_execution
|
| 46 |
+
|
| 47 |
+
logger = get_logger("chat_api_v2")
|
| 48 |
+
|
| 49 |
+
router = APIRouter(prefix="/api/v2", tags=["Chat"])
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _mint_message_id(provided: str | None) -> str:
|
| 53 |
+
"""Use Go's assistant turn id when provided; else mint one (contract open-Q #1)."""
|
| 54 |
+
return provided or f"msg_{uuid.uuid4().hex[:12]}"
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
class ChatRequest(BaseModel):
|
| 58 |
+
user_id: str
|
| 59 |
+
analysis_id: str
|
| 60 |
+
message: str
|
| 61 |
+
# ⚠️ open-Q #1: Go may mint + pass the assistant turn id; if absent we mint one and
|
| 62 |
+
# echo it on `done` so the FE can correlate /observability with this answer.
|
| 63 |
+
message_id: str | None = None
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
@router.post("/chat/stream")
|
| 67 |
+
@log_execution(logger)
|
| 68 |
+
async def chat_stream(request: ChatRequest, db: AsyncSession = Depends(get_db)):
|
| 69 |
+
"""Chat endpoint with streaming response (v2 — keyed on `analysis_id`).
|
| 70 |
+
|
| 71 |
+
SSE event sequence:
|
| 72 |
+
1. sources — JSON array of source refs (table for structured; deduped
|
| 73 |
+
document_id/page_label for unstructured; [] for chat/help/error)
|
| 74 |
+
2. status — slow-path progress pings (optional)
|
| 75 |
+
3. chunk — text fragments of the answer
|
| 76 |
+
4. done — {"message_id": "..."} for the observability lookup
|
| 77 |
+
"""
|
| 78 |
+
analysis_id = request.analysis_id
|
| 79 |
+
message_id = _mint_message_id(request.message_id)
|
| 80 |
+
redis = await get_redis()
|
| 81 |
+
cache_key = _chat_cache_key(analysis_id, request.user_id, request.message)
|
| 82 |
+
|
| 83 |
+
# v2 `done` always carries the turn id (v1 sent an empty `done`).
|
| 84 |
+
done_event = {"event": "done", "data": json.dumps({"message_id": message_id})}
|
| 85 |
+
|
| 86 |
+
# Redis cache hit (stateless `chat` intent only).
|
| 87 |
+
cached = await get_cached_response(redis, cache_key)
|
| 88 |
+
logger.info("cache check", cache_key=cache_key, cache_hit=cached is not None)
|
| 89 |
+
if cached:
|
| 90 |
+
logger.info("Returning cached response")
|
| 91 |
+
cached_text = cached["response"]
|
| 92 |
+
cached_sources = cached["sources"]
|
| 93 |
+
await save_messages(db, analysis_id, request.message, cached_text, sources=cached_sources)
|
| 94 |
+
|
| 95 |
+
async def stream_cached():
|
| 96 |
+
yield {"event": "sources", "data": json.dumps(cached_sources)}
|
| 97 |
+
for i in range(0, len(cached_text), 50):
|
| 98 |
+
yield {"event": "chunk", "data": cached_text[i:i + 50]}
|
| 99 |
+
yield done_event
|
| 100 |
+
|
| 101 |
+
return EventSourceResponse(stream_cached())
|
| 102 |
+
|
| 103 |
+
try:
|
| 104 |
+
# Fast intent: greetings/farewells bypass the LLM entirely.
|
| 105 |
+
direct = _fast_intent(request.message)
|
| 106 |
+
if direct:
|
| 107 |
+
await cache_response(redis, cache_key, direct, sources=[])
|
| 108 |
+
await save_messages(db, analysis_id, request.message, direct, sources=[])
|
| 109 |
+
|
| 110 |
+
async def stream_direct():
|
| 111 |
+
yield {"event": "sources", "data": json.dumps([])}
|
| 112 |
+
yield {"event": "chunk", "data": direct}
|
| 113 |
+
yield done_event
|
| 114 |
+
|
| 115 |
+
return EventSourceResponse(stream_direct())
|
| 116 |
+
|
| 117 |
+
history = await load_history(db, analysis_id, limit=10)
|
| 118 |
+
handler = _chat_handler
|
| 119 |
+
|
| 120 |
+
async def stream_response():
|
| 121 |
+
logger.info("stream_response started", analysis_id=analysis_id, user_id=request.user_id)
|
| 122 |
+
full_response = ""
|
| 123 |
+
sources: list[dict[str, Any]] = []
|
| 124 |
+
effective_intent: str | None = None
|
| 125 |
+
async for event in handler.handle(
|
| 126 |
+
request.message, request.user_id, history, analysis_id=analysis_id
|
| 127 |
+
):
|
| 128 |
+
if event["event"] == "intent":
|
| 129 |
+
# consumed internally (not forwarded); gates caching below.
|
| 130 |
+
try:
|
| 131 |
+
effective_intent = json.loads(event["data"]).get("intent")
|
| 132 |
+
except (TypeError, ValueError, AttributeError):
|
| 133 |
+
effective_intent = None
|
| 134 |
+
elif event["event"] == "sources":
|
| 135 |
+
try:
|
| 136 |
+
sources = json.loads(event["data"]) or []
|
| 137 |
+
except (TypeError, ValueError):
|
| 138 |
+
sources = []
|
| 139 |
+
yield event
|
| 140 |
+
elif event["event"] == "chunk":
|
| 141 |
+
full_response += event["data"]
|
| 142 |
+
yield event
|
| 143 |
+
elif event["event"] == "done":
|
| 144 |
+
# Only cache stateless `chat` replies (see _CACHEABLE_INTENTS).
|
| 145 |
+
if effective_intent in _CACHEABLE_INTENTS:
|
| 146 |
+
await cache_response(redis, cache_key, full_response, sources=sources)
|
| 147 |
+
try:
|
| 148 |
+
await save_messages(
|
| 149 |
+
db, analysis_id, request.message, full_response, sources=sources
|
| 150 |
+
)
|
| 151 |
+
except Exception as e:
|
| 152 |
+
logger.error("save_messages failed", analysis_id=analysis_id, error=str(e))
|
| 153 |
+
yield done_event
|
| 154 |
+
elif event["event"] == "status":
|
| 155 |
+
# slow-path progress: forward so the client shows activity.
|
| 156 |
+
yield event
|
| 157 |
+
elif event["event"] == "error":
|
| 158 |
+
yield event
|
| 159 |
+
return
|
| 160 |
+
|
| 161 |
+
return EventSourceResponse(stream_response())
|
| 162 |
+
|
| 163 |
+
except Exception as e:
|
| 164 |
+
logger.error("Chat failed", error=str(e))
|
| 165 |
+
raise HTTPException(status_code=500, detail=f"Chat failed: {str(e)}") from e
|