jeanbaptdzd commited on
Commit
6851411
·
0 Parent(s):

feat: FastAPI vLLM service with OpenAI-compatible endpoints and PRIIPs extractor

Browse files
.gitignore ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ .pytest_cache/
3
+ .venv/
4
+ *.pyc
5
+ *.pyo
6
+ *.pyd
7
+ .DS_Store
8
+ .env
9
+ .env.*
10
+ dist/
11
+ build/
12
+ *.log
13
+ *.sqlite3
14
+ *.db
15
+
README.md ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PRIIPs LLM Service (vLLM + FastAPI)
2
+
3
+ OpenAI-compatible API and PRIIPs extractor powered by `DragonLLM/LLM-Pro-Finance-Small` via vLLM.
4
+
5
+ ## Setup
6
+
7
+ 1. Create and activate a virtualenv (optional)
8
+ 2. Install dependencies:
9
+
10
+ ```bash
11
+ pip install -r requirements.txt
12
+ ```
13
+
14
+ 3. Configure environment:
15
+
16
+ - Copy `.env.example` to `.env` and adjust values
17
+ - Ensure your vLLM server is running and has `HUGGING_FACE_HUB_TOKEN` set so it can pull the model
18
+
19
+ Start vLLM (example):
20
+
21
+ ```bash
22
+ HUGGING_FACE_HUB_TOKEN=$HF_TOKEN \
23
+ python -m vllm.entrypoints.openai.api_server \
24
+ --model DragonLLM/LLM-Pro-Finance-Small \
25
+ --host 0.0.0.0 --port 8000
26
+ ```
27
+
28
+ Run the FastAPI app:
29
+
30
+ ```bash
31
+ uvicorn app.main:app --reload --port 8080
32
+ ```
33
+
34
+ ## OpenAI-compatible API
35
+
36
+ - GET `/v1/models`
37
+ - POST `/v1/chat/completions` (supports `stream=true` if vLLM streaming enabled)
38
+
39
+ Point PydanticAI/DSPy to `http://localhost:8080/v1` as the base.
40
+
41
+ ## PRIIPs extraction
42
+
43
+ - POST `/extract-priips` with body:
44
+
45
+ ```json
46
+ {
47
+ "sources": ["https://example.com/doc.pdf"],
48
+ "options": {"language": "en", "ocr": false}
49
+ }
50
+ ```
51
+
52
+ Returns structured JSON validated by Pydantic.
app/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # empty package marker
2
+
app/config.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic_settings import BaseSettings
2
+
3
+
4
+ class Settings(BaseSettings):
5
+ vllm_base_url: str = "http://localhost:8000/v1"
6
+ model: str = "DragonLLM/LLM-Pro-Finance-Small"
7
+ service_api_key: str | None = None
8
+ log_level: str = "info"
9
+
10
+ class Config:
11
+ env_file = ".env"
12
+ env_file_encoding = "utf-8"
13
+
14
+
15
+ settings = Settings()
16
+
17
+
app/main.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from app.middleware import api_key_guard
3
+
4
+ from app.routers import openai_api, extract
5
+
6
+
7
+ app = FastAPI(title="PRIIPs LLM Service (vLLM)")
8
+
9
+ # Mount routers
10
+ app.include_router(openai_api.router, prefix="/v1")
11
+ app.include_router(extract.router)
12
+
13
+ # Optional API key middleware
14
+ app.middleware("http")(api_key_guard)
15
+
16
+
17
+ @app.get("/")
18
+ async def root():
19
+ return {"status": "ok"}
20
+
21
+
app/middleware.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import Request, HTTPException
2
+ from fastapi.responses import JSONResponse
3
+
4
+ from app.config import settings
5
+
6
+
7
+ async def api_key_guard(request: Request, call_next):
8
+ if not settings.service_api_key:
9
+ return await call_next(request)
10
+ key = request.headers.get("x-api-key") or request.headers.get("authorization")
11
+ if key and key.replace("Bearer ", "").strip() == settings.service_api_key:
12
+ return await call_next(request)
13
+ return JSONResponse({"error": "unauthorized"}, status_code=401)
14
+
15
+
app/models/openai.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Literal, Optional
2
+ from pydantic import BaseModel, Field
3
+
4
+
5
+ Role = Literal["system", "user", "assistant", "tool"]
6
+
7
+
8
+ class Message(BaseModel):
9
+ role: Role
10
+ content: str
11
+
12
+
13
+ class ChatCompletionRequest(BaseModel):
14
+ model: str
15
+ messages: List[Message]
16
+ temperature: Optional[float] = 0.2
17
+ max_tokens: Optional[int] = Field(default=None, alias="max_tokens")
18
+ stream: Optional[bool] = False
19
+
20
+
21
+ class ChoiceMessage(BaseModel):
22
+ role: Literal["assistant"]
23
+ content: Optional[str] = None
24
+
25
+
26
+ class Choice(BaseModel):
27
+ index: int
28
+ message: ChoiceMessage
29
+ finish_reason: Optional[str] = None
30
+
31
+
32
+ class Usage(BaseModel):
33
+ prompt_tokens: int
34
+ completion_tokens: int
35
+ total_tokens: int
36
+
37
+
38
+ class ChatCompletionResponse(BaseModel):
39
+ id: str
40
+ object: Literal["chat.completion"] = "chat.completion"
41
+ created: int
42
+ model: str
43
+ choices: List[Choice]
44
+ usage: Optional[Usage] = None
45
+
46
+
app/models/priips.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional
2
+ from pydantic import BaseModel
3
+
4
+
5
+ class PerformanceScenario(BaseModel):
6
+ name: str
7
+ description: Optional[str] = None
8
+ return_pct: Optional[float] = None
9
+
10
+
11
+ class Costs(BaseModel):
12
+ entry_cost_pct: Optional[float] = None
13
+ ongoing_cost_pct: Optional[float] = None
14
+ exit_cost_pct: Optional[float] = None
15
+
16
+
17
+ class PriipsFields(BaseModel):
18
+ product_name: Optional[str] = None
19
+ manufacturer: Optional[str] = None
20
+ isin: Optional[str] = None
21
+ sri: Optional[int] = None
22
+ recommended_holding_period: Optional[str] = None
23
+ costs: Optional[Costs] = None
24
+ performance_scenarios: Optional[List[PerformanceScenario]] = None
25
+ date: Optional[str] = None
26
+ language: Optional[str] = None
27
+ source_url: Optional[str] = None
28
+
29
+
30
+ class ExtractRequest(BaseModel):
31
+ sources: List[str]
32
+ options: Optional[dict] = None
33
+
34
+
35
+ class ExtractResult(BaseModel):
36
+ source: str
37
+ success: bool
38
+ data: Optional[PriipsFields] = None
39
+ error: Optional[str] = None
40
+
41
+
app/providers/base.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Protocol, Dict, Any
2
+
3
+
4
+ class LLMProvider(Protocol):
5
+ async def list_models(self) -> Dict[str, Any]:
6
+ ...
7
+
8
+ async def chat(self, payload: Dict[str, Any], stream: bool = False) -> Any:
9
+ ...
10
+
11
+
app/providers/vllm.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import httpx
2
+ from app.config import settings
3
+
4
+
5
+ async def list_models():
6
+ async with httpx.AsyncClient(timeout=30) as client:
7
+ r = await client.get(f"{settings.vllm_base_url}/models")
8
+ r.raise_for_status()
9
+ return r.json()
10
+
11
+
12
+ async def chat(payload, stream: bool = False):
13
+ async with httpx.AsyncClient(timeout=None) as client:
14
+ if stream:
15
+ return await client.stream(
16
+ "POST", f"{settings.vllm_base_url}/chat/completions", json=payload
17
+ )
18
+ r = await client.post(
19
+ f"{settings.vllm_base_url}/chat/completions", json=payload
20
+ )
21
+ r.raise_for_status()
22
+ return r.json()
23
+
24
+
app/routers/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # package
2
+
app/routers/extract.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+
3
+ from app.models.priips import ExtractRequest
4
+ from app.services import extract_service
5
+
6
+
7
+ router = APIRouter()
8
+
9
+
10
+ @router.post("/extract-priips")
11
+ async def extract_priips(body: ExtractRequest):
12
+ return await extract_service.extract(body)
13
+
14
+
app/routers/openai_api.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from typing import Any, Dict
3
+
4
+ from fastapi import APIRouter
5
+ from fastapi.responses import StreamingResponse, JSONResponse
6
+
7
+ from app.config import settings
8
+ from app.models.openai import ChatCompletionRequest
9
+ from app.services import chat_service
10
+
11
+
12
+ router = APIRouter()
13
+
14
+
15
+ @router.get("/models")
16
+ async def list_models():
17
+ return await chat_service.list_models()
18
+
19
+
20
+ @router.post("/chat/completions")
21
+ async def chat_completions(body: ChatCompletionRequest):
22
+ payload: Dict[str, Any] = {
23
+ "model": body.model or settings.model,
24
+ "messages": [m.model_dump() for m in body.messages],
25
+ "temperature": body.temperature,
26
+ **({"max_tokens": body.max_tokens} if body.max_tokens is not None else {}),
27
+ "stream": body.stream or False,
28
+ }
29
+
30
+ if body.stream:
31
+ upstream = await chat_service.chat(payload, stream=True)
32
+
33
+ async def event_stream():
34
+ async for line in upstream.aiter_lines():
35
+ if not line:
36
+ continue
37
+ if line.startswith("data:"):
38
+ yield f"{line}\n\n"
39
+ else:
40
+ yield f"data: {line}\n\n"
41
+
42
+ return StreamingResponse(event_stream(), media_type="text/event-stream")
43
+
44
+ data = await chat_service.chat(payload, stream=False)
45
+ # Assume vLLM already returns OpenAI-compatible schema; pass through.
46
+ # If needed, normalize here.
47
+ return JSONResponse(content=data)
48
+
49
+
app/services/chat_service.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict
2
+
3
+ from app.providers import vllm as provider
4
+
5
+
6
+ async def list_models() -> Dict[str, Any]:
7
+ return await provider.list_models()
8
+
9
+
10
+ async def chat(payload: Dict[str, Any], stream: bool = False):
11
+ return await provider.chat(payload, stream=stream)
12
+
13
+
app/services/extract_service.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from pathlib import Path
3
+ from typing import List
4
+
5
+ from app.config import settings
6
+ from app.models.priips import ExtractRequest, ExtractResult, PriipsFields
7
+ from app.providers import vllm
8
+ from app.utils.pdf import download_to_tmp, extract_text_from_pdf
9
+ from app.utils.json_guard import try_parse_json
10
+
11
+
12
+ def build_prompt(text: str) -> str:
13
+ schema = {
14
+ "product_name": "string",
15
+ "manufacturer": "string",
16
+ "isin": "string",
17
+ "sri": "integer (1-7)",
18
+ "recommended_holding_period": "string",
19
+ "costs": {
20
+ "entry_cost_pct": "number?",
21
+ "ongoing_cost_pct": "number?",
22
+ "exit_cost_pct": "number?",
23
+ },
24
+ "performance_scenarios": [
25
+ {"name": "string", "description": "string?", "return_pct": "number?"}
26
+ ],
27
+ "date": "string?",
28
+ "language": "string?",
29
+ "source_url": "string?",
30
+ }
31
+ instruction = (
32
+ "You are an expert financial document parser. "
33
+ "Extract the requested PRIIPs fields as STRICT JSON only, no extra text. "
34
+ f"JSON schema keys: {list(schema.keys())}."
35
+ )
36
+ return f"{instruction}\n\nDocument:\n{text[:15000]}"
37
+
38
+
39
+ async def process_source(src: str) -> ExtractResult:
40
+ try:
41
+ path: Path
42
+ if src.lower().startswith("http"):
43
+ path = await download_to_tmp(src, Path(".tmp"))
44
+ else:
45
+ path = Path(src)
46
+ text = extract_text_from_pdf(path)
47
+ prompt = build_prompt(text)
48
+
49
+ payload = {
50
+ "model": settings.model,
51
+ "messages": [
52
+ {"role": "system", "content": "You output JSON only."},
53
+ {"role": "user", "content": prompt},
54
+ ],
55
+ "temperature": 0.1,
56
+ "max_tokens": 800,
57
+ "stream": False,
58
+ }
59
+ data = await vllm.chat(payload, stream=False)
60
+
61
+ # vLLM OpenAI response
62
+ content = (
63
+ data.get("choices", [{}])[0]
64
+ .get("message", {})
65
+ .get("content", "")
66
+ if isinstance(data, dict)
67
+ else ""
68
+ )
69
+ ok, parsed = try_parse_json(content)
70
+ if not ok:
71
+ return ExtractResult(source=src, success=False, error=str(parsed))
72
+
73
+ model_data = PriipsFields(**parsed)
74
+ model_data.source_url = src
75
+ return ExtractResult(source=src, success=True, data=model_data)
76
+ except Exception as e:
77
+ return ExtractResult(source=src, success=False, error=str(e))
78
+
79
+
80
+ async def extract(req: ExtractRequest) -> List[ExtractResult]:
81
+ results: List[ExtractResult] = []
82
+ for src in req.sources:
83
+ results.append(await process_source(src))
84
+ return results
85
+
86
+
app/utils/json_guard.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from typing import Any, Tuple
3
+
4
+
5
+ def try_parse_json(text: str) -> Tuple[bool, Any]:
6
+ try:
7
+ return True, json.loads(text)
8
+ except Exception:
9
+ # naive repair: strip markdown fences if present
10
+ t = text.strip()
11
+ if t.startswith("```") and t.endswith("```"):
12
+ t = t.strip("`\n ")
13
+ try:
14
+ return True, json.loads(t)
15
+ except Exception as e:
16
+ return False, str(e)
17
+
18
+
app/utils/pdf.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from typing import Optional
3
+
4
+ import httpx
5
+ import fitz # PyMuPDF
6
+
7
+
8
+ async def download_to_tmp(url: str, tmp_dir: Path) -> Path:
9
+ tmp_dir.mkdir(parents=True, exist_ok=True)
10
+ filename = url.split("/")[-1] or "document.pdf"
11
+ target = tmp_dir / filename
12
+ async with httpx.AsyncClient(timeout=60) as client:
13
+ r = await client.get(url)
14
+ r.raise_for_status()
15
+ target.write_bytes(r.content)
16
+ return target
17
+
18
+
19
+ def extract_text_from_pdf(path: Path) -> str:
20
+ doc = fitz.open(path)
21
+ try:
22
+ texts: list[str] = []
23
+ for page in doc:
24
+ texts.append(page.get_text("text"))
25
+ return "\n".join(texts).strip()
26
+ finally:
27
+ doc.close()
28
+
29
+
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi>=0.115.0
2
+ uvicorn[standard]>=0.30.0
3
+ pydantic>=2.8.0
4
+ pydantic-settings>=2.4.0
5
+ httpx>=0.27.0
6
+ python-dotenv>=1.0.1
7
+ tenacity>=8.3.0
8
+ PyMuPDF>=1.24.0
9
+