HariLogicgo commited on
Commit
b26b1fd
·
1 Parent(s): 83668a5

tested ready for deployment

Browse files
.gitignore ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Virtual Environments
7
+ venv/
8
+ env/
9
+ .venv/
10
+ .env
11
+
12
+ # Distribution / Packaging
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+
53
+ # Jupyter Notebook
54
+ .ipynb_checkpoints
55
+
56
+ # IDEs
57
+ .vscode/
58
+ .idea/
59
+
60
+ # OS specific
61
+ .DS_Store
62
+ Thumbs.db
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ ENV PYTHONDONTWRITEBYTECODE=1 \
4
+ PYTHONUNBUFFERED=1 \
5
+ PORT=7860
6
+
7
+ WORKDIR /app
8
+
9
+ RUN apt-get update \
10
+ && apt-get install -y --no-install-recommends build-essential \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ COPY requirements.txt .
14
+ RUN pip install --no-cache-dir -r requirements.txt
15
+
16
+ COPY . .
17
+
18
+ EXPOSE 7860
19
+
20
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app/__init__.py ADDED
File without changes
app/api/__init__.py ADDED
File without changes
app/api/routes.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Depends
2
+
3
+ from app.dependencies import get_category_service
4
+ from app.schemas.categories import CategorizeRequest, CategoryPrediction
5
+ from app.services.autocategorizer import AutoCategoryService
6
+
7
+ router = APIRouter()
8
+
9
+
10
+ @router.post("/categorize", response_model=CategoryPrediction, summary="Categorize a transaction note")
11
+ async def categorize_transaction(
12
+ payload: CategorizeRequest,
13
+ service: AutoCategoryService = Depends(get_category_service),
14
+ ) -> CategoryPrediction:
15
+ return await service.categorize(payload.notes)
app/core/__init__.py ADDED
File without changes
app/core/config.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import lru_cache
2
+
3
+ from pydantic import Field
4
+ from pydantic_settings import BaseSettings, SettingsConfigDict
5
+
6
+
7
+ class Settings(BaseSettings):
8
+ model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")
9
+
10
+ mongo_uri: str = Field(..., alias="MONGO_URI")
11
+ mongo_db: str = Field("wallet_sync", alias="MONGO_DB")
12
+ mongo_collection: str = Field("autocategory", alias="MONGO_COLLECTION")
13
+
14
+ openai_api_key: str = Field(..., alias="OPENAI_API_KEY")
15
+ openai_model: str = Field("gpt-4o-mini", alias="OPENAI_MODEL")
16
+
17
+ category_cache_ttl_seconds: int = Field(300, alias="CATEGORY_CACHE_TTL")
18
+
19
+
20
+ @lru_cache
21
+ def get_settings() -> Settings:
22
+ return Settings()
23
+
24
+
25
+ settings = get_settings()
app/core/openai_client.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from openai import AsyncOpenAI
2
+
3
+ from app.core.config import settings
4
+
5
+ openai_client = AsyncOpenAI(api_key=settings.openai_api_key)
app/db/__init__.py ADDED
File without changes
app/db/mongo.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional
4
+
5
+ from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorCollection, AsyncIOMotorDatabase
6
+
7
+ from app.core.config import settings
8
+
9
+ _mongo_client: Optional[AsyncIOMotorClient] = None
10
+
11
+
12
+ async def connect_to_mongo() -> None:
13
+ """Initialize the MongoDB client once during application startup."""
14
+ global _mongo_client
15
+ if _mongo_client is not None:
16
+ return
17
+
18
+ client = AsyncIOMotorClient(settings.mongo_uri, serverSelectionTimeoutMS=5000)
19
+ # Trigger a server selection to fail fast if the URI/config is invalid.
20
+ await client.server_info()
21
+ _mongo_client = client
22
+
23
+
24
+ async def close_mongo_connection() -> None:
25
+ global _mongo_client
26
+ if _mongo_client is None:
27
+ return
28
+ _mongo_client.close()
29
+ _mongo_client = None
30
+
31
+
32
+ def get_client() -> AsyncIOMotorClient:
33
+ if _mongo_client is None:
34
+ raise RuntimeError("MongoDB client is not initialized. Wait for startup to finish.")
35
+ return _mongo_client
36
+
37
+
38
+ def get_database() -> AsyncIOMotorDatabase:
39
+ return get_client()[settings.mongo_db]
40
+
41
+
42
+ def get_autocategory_collection() -> AsyncIOMotorCollection:
43
+ return get_database()[settings.mongo_collection]
app/dependencies.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import lru_cache
2
+
3
+ from app.core.config import settings
4
+ from app.core.openai_client import openai_client
5
+ from app.db.mongo import get_autocategory_collection
6
+ from app.services.autocategorizer import AutoCategoryService
7
+
8
+
9
+ @lru_cache
10
+ def _get_service() -> AutoCategoryService:
11
+ return AutoCategoryService(
12
+ collection_getter=get_autocategory_collection,
13
+ openai_client=openai_client,
14
+ model=settings.openai_model,
15
+ cache_ttl_seconds=settings.category_cache_ttl_seconds,
16
+ )
17
+
18
+
19
+ def get_category_service() -> AutoCategoryService:
20
+ return _get_service()
app/main.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from contextlib import asynccontextmanager
2
+
3
+ from fastapi import FastAPI
4
+
5
+ from app.api.routes import router as api_router
6
+ from app.db.mongo import close_mongo_connection, connect_to_mongo
7
+
8
+
9
+ @asynccontextmanager
10
+ async def lifespan(_: FastAPI):
11
+ await connect_to_mongo()
12
+ try:
13
+ yield
14
+ finally:
15
+ await close_mongo_connection()
16
+
17
+
18
+ app = FastAPI(
19
+ title="Auto Categorizer API",
20
+ version="1.0.0",
21
+ lifespan=lifespan,
22
+ )
23
+
24
+ app.include_router(api_router, prefix="/api/v1")
25
+
26
+
27
+ @app.get("/health", tags=["Health"])
28
+ async def health_check() -> dict[str, str]:
29
+ return {"status": "ok"}
app/models/__init__.py ADDED
File without changes
app/schemas/__init__.py ADDED
File without changes
app/schemas/categories.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+
3
+
4
+ class CategorizeRequest(BaseModel):
5
+ notes: str = Field(..., min_length=1, description="Full transaction note.")
6
+
7
+
8
+ class CategoryPrediction(BaseModel):
9
+ title: str = Field(..., description="High-level category title.")
10
+ subcategory: str = Field(..., description="Specific subcategory chosen by the model.")
app/services/__init__.py ADDED
File without changes
app/services/autocategorizer.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ import asyncio
5
+ import json
6
+ import re
7
+ import time
8
+ from typing import Callable, Dict, List, Optional
9
+
10
+ from fastapi import HTTPException
11
+ from motor.motor_asyncio import AsyncIOMotorCollection
12
+ from openai import AsyncOpenAI
13
+
14
+ from app.core.config import settings
15
+ from app.schemas.categories import CategoryPrediction
16
+
17
+
18
+ class AutoCategoryService:
19
+ """Classifies transaction notes into the closest Mongo-backed category."""
20
+
21
+ def __init__(
22
+ self,
23
+ collection_getter: Callable[[], AsyncIOMotorCollection],
24
+ openai_client: AsyncOpenAI,
25
+ model: str,
26
+ cache_ttl_seconds: int,
27
+ ) -> None:
28
+ self._collection_getter = collection_getter
29
+ self._openai_client = openai_client
30
+ self._model = model
31
+ self._cache_ttl_seconds = cache_ttl_seconds
32
+ self._cached_categories: List[Dict[str, object]] | None = None
33
+ self._last_loaded: float = 0.0
34
+ self._lock = asyncio.Lock()
35
+
36
+ def _collection(self) -> AsyncIOMotorCollection:
37
+ return self._collection_getter()
38
+
39
+ async def categorize(self, notes: str) -> CategoryPrediction:
40
+ categories = await self._get_categories()
41
+ if not categories:
42
+ raise HTTPException(status_code=500, detail="No categories configured.")
43
+
44
+ formatted_categories = self._format_categories(categories)
45
+ user_prompt = (
46
+ "Transaction note:\n"
47
+ f"{notes}\n\n"
48
+ "Available categories and subcategories:\n"
49
+ f"{formatted_categories}\n\n"
50
+ "Respond with the exact title and subcategory from the list above."
51
+ )
52
+
53
+ request_payload = dict(
54
+ model=self._model,
55
+ input=[
56
+ {
57
+ "role": "system",
58
+ "content": (
59
+ "You classify financial transactions into the closest category. "
60
+ "Only use the provided title and subcategory options. "
61
+ "Output valid JSON with keys 'title' and 'subcategory'."
62
+ ),
63
+ },
64
+ {"role": "user", "content": [{"type": "input_text", "text": user_prompt}]},
65
+ ],
66
+ )
67
+
68
+ try:
69
+ response = await self._openai_client.responses.create(
70
+ response_format={"type": "json_object"},
71
+ **request_payload,
72
+ )
73
+ except TypeError as exc:
74
+ # Older openai-python clients (pre 1.3x) do not yet support response_format.
75
+ if "response_format" not in str(exc):
76
+ raise
77
+ response = await self._openai_client.responses.create(**request_payload)
78
+
79
+ try:
80
+ payload = self._parse_response_payload(response)
81
+ except ValueError as exc:
82
+ raise HTTPException(status_code=502, detail="Failed to parse model output.") from exc
83
+
84
+ title = payload.get("title")
85
+ subcategory = payload.get("subcategory")
86
+ if not isinstance(title, str) or not isinstance(subcategory, str):
87
+ raise HTTPException(status_code=502, detail="Model response missing category fields.")
88
+
89
+ return CategoryPrediction(title=title.strip(), subcategory=subcategory.strip())
90
+
91
+ def _parse_response_payload(self, response) -> Dict[str, object]:
92
+ raw_text = self._extract_response_text(response)
93
+ if not raw_text:
94
+ raise ValueError("Model response did not contain text content.")
95
+
96
+ cleaned = self._strip_code_fence(raw_text)
97
+ candidates = [cleaned]
98
+
99
+ json_snippet = self._extract_json_snippet(cleaned)
100
+ if json_snippet and json_snippet not in candidates:
101
+ candidates.append(json_snippet)
102
+
103
+ for candidate in candidates:
104
+ for parser in (self._try_parse_json, self._try_parse_literal_dict, self._try_parse_key_values):
105
+ payload = parser(candidate)
106
+ if payload:
107
+ return payload
108
+
109
+ raise ValueError("Unable to coerce model response into a payload.")
110
+
111
+ @staticmethod
112
+ def _extract_response_text(response) -> str:
113
+ text = getattr(response, "output_text", "") or ""
114
+ if isinstance(text, str) and text.strip():
115
+ return text.strip()
116
+
117
+ outputs = getattr(response, "output", []) or []
118
+ for output in outputs:
119
+ contents = getattr(output, "content", []) or []
120
+ for content in contents:
121
+ value = getattr(content, "text", None)
122
+ if isinstance(value, str) and value.strip():
123
+ return value.strip()
124
+
125
+ return ""
126
+
127
+ @staticmethod
128
+ def _strip_code_fence(raw_text: str) -> str:
129
+ text = raw_text.strip()
130
+ if text.startswith("```") and text.endswith("```"):
131
+ lines = text.split("\n")
132
+ # Drop first and last fence line
133
+ if len(lines) >= 2:
134
+ text = "\n".join(lines[1:-1]).strip()
135
+ return text
136
+
137
+ @staticmethod
138
+ def _extract_json_snippet(raw_text: str) -> Optional[str]:
139
+ start = raw_text.find("{")
140
+ end = raw_text.rfind("}")
141
+ if start == -1 or end == -1 or end <= start:
142
+ return None
143
+ return raw_text[start : end + 1]
144
+
145
+ @staticmethod
146
+ def _try_parse_json(raw_text: str) -> Optional[Dict[str, object]]:
147
+ text = raw_text.strip()
148
+ if not text:
149
+ return None
150
+ try:
151
+ payload = json.loads(text)
152
+ except json.JSONDecodeError:
153
+ return None
154
+ return payload if isinstance(payload, dict) else None
155
+
156
+ @staticmethod
157
+ def _try_parse_literal_dict(raw_text: str) -> Optional[Dict[str, object]]:
158
+ try:
159
+ payload = ast.literal_eval(raw_text)
160
+ except (SyntaxError, ValueError):
161
+ return None
162
+ return payload if isinstance(payload, dict) else None
163
+
164
+ @staticmethod
165
+ def _try_parse_key_values(raw_text: str) -> Optional[Dict[str, object]]:
166
+ title: Optional[str] = None
167
+ subcategory: Optional[str] = None
168
+ for chunk in re.split(r"[\n;,]+", raw_text):
169
+ if ":" in chunk:
170
+ key, value = chunk.split(":", 1)
171
+ elif "=" in chunk:
172
+ key, value = chunk.split("=", 1)
173
+ else:
174
+ continue
175
+ key_normalized = key.strip().lower()
176
+ value_clean = value.strip().strip('"\'')
177
+ if not value_clean:
178
+ continue
179
+ if key_normalized in {"title", "category"}:
180
+ title = value_clean
181
+ elif key_normalized in {"subcategory", "sub_category", "sub"}:
182
+ subcategory = value_clean
183
+
184
+ if title and subcategory:
185
+ return {"title": title, "subcategory": subcategory}
186
+
187
+ return None
188
+
189
+ async def _get_categories(self) -> List[Dict[str, object]]:
190
+ async with self._lock:
191
+ now = time.monotonic()
192
+ if self._cached_categories and (now - self._last_loaded) < self._cache_ttl_seconds:
193
+ return self._cached_categories
194
+
195
+ categories: List[Dict[str, object]] = []
196
+ async for doc in self._collection().find({}, {"_id": 0}):
197
+ categories.append(
198
+ {
199
+ "title": doc.get("title", ""),
200
+ "subcategories": doc.get("subcategories", []),
201
+ }
202
+ )
203
+
204
+ self._cached_categories = categories
205
+ self._last_loaded = now
206
+ return categories
207
+
208
+ @staticmethod
209
+ def _format_categories(categories: List[Dict[str, object]]) -> str:
210
+ lines = []
211
+ for category in categories:
212
+ subs = category.get("subcategories") or []
213
+ subs_text = ", ".join(subs) if subs else "Unspecified"
214
+ lines.append(f"- {category.get('title', 'Unknown')}: {subs_text}")
215
+ return "\n".join(lines)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi==0.111.0
2
+ uvicorn[standard]==0.30.1
3
+ motor==3.4.0
4
+ pymongo>=4.6.0,<4.7.0
5
+ pydantic-settings==2.2.1
6
+ python-dotenv==1.0.1
7
+ openai>=1.30.1,<2.0.0