Spaces:
Running
Running
github-actions[bot]
commited on
Commit
·
66e683e
1
Parent(s):
5ecd2f9
Sync turing folder from GitHub
Browse files- turing/api/app.py +16 -7
- turing/api/resource_monitoring.py +148 -0
- turing/api/schemas.py +0 -3
turing/api/app.py
CHANGED
|
@@ -1,12 +1,14 @@
|
|
| 1 |
import base64
|
| 2 |
import os
|
|
|
|
| 3 |
|
| 4 |
-
from fastapi import FastAPI, HTTPException
|
| 5 |
from fastapi.responses import JSONResponse
|
| 6 |
import gradio as gr
|
| 7 |
from loguru import logger
|
| 8 |
|
| 9 |
from turing.api.demo import create_demo
|
|
|
|
| 10 |
from turing.api.schemas import PredictionRequest, PredictionResponse
|
| 11 |
from turing.modeling.predict import ModelInference
|
| 12 |
|
|
@@ -53,6 +55,9 @@ app = FastAPI(
|
|
| 53 |
version="1.0.0"
|
| 54 |
)
|
| 55 |
|
|
|
|
|
|
|
|
|
|
| 56 |
@app.get("/manifest.json")
|
| 57 |
def get_manifest():
|
| 58 |
return JSONResponse(content={
|
|
@@ -67,8 +72,10 @@ def get_manifest():
|
|
| 67 |
|
| 68 |
# Global inference engine instance
|
| 69 |
inference_engine = ModelInference()
|
| 70 |
-
|
| 71 |
demo = create_demo(inference_engine)
|
|
|
|
|
|
|
|
|
|
| 72 |
app = gr.mount_gradio_app(app, demo, path="/gradio")
|
| 73 |
|
| 74 |
@app.get("/")
|
|
@@ -80,17 +87,19 @@ def health_check():
|
|
| 80 |
|
| 81 |
|
| 82 |
@app.post("/predict", response_model=PredictionResponse)
|
| 83 |
-
def predict(request: PredictionRequest
|
|
|
|
|
|
|
| 84 |
"""
|
| 85 |
Endpoint to classify a list of code comments.
|
| 86 |
Dynamically loads the model from MLflow based on the request parameters.
|
| 87 |
"""
|
| 88 |
try:
|
| 89 |
-
logger.info(f"Received prediction request for language: {
|
| 90 |
-
|
| 91 |
# Perform prediction using the inference engine
|
| 92 |
raw, predictions, run_id, artifact = inference_engine.predict_payload(
|
| 93 |
-
texts=request.texts, language=
|
| 94 |
)
|
| 95 |
|
| 96 |
# Ensure predictions are serializable (convert numpy arrays to lists)
|
|
@@ -100,7 +109,7 @@ def predict(request: PredictionRequest):
|
|
| 100 |
return PredictionResponse(
|
| 101 |
predictions=raw.tolist(),
|
| 102 |
labels=predictions,
|
| 103 |
-
model_info={"artifact": artifact, "language":
|
| 104 |
)
|
| 105 |
|
| 106 |
except Exception as e:
|
|
|
|
| 1 |
import base64
|
| 2 |
import os
|
| 3 |
+
from typing import Literal
|
| 4 |
|
| 5 |
+
from fastapi import FastAPI, HTTPException, Query
|
| 6 |
from fastapi.responses import JSONResponse
|
| 7 |
import gradio as gr
|
| 8 |
from loguru import logger
|
| 9 |
|
| 10 |
from turing.api.demo import create_demo
|
| 11 |
+
from turing.api.resource_monitoring import PrometheusBodyMiddleware, instrumentator
|
| 12 |
from turing.api.schemas import PredictionRequest, PredictionResponse
|
| 13 |
from turing.modeling.predict import ModelInference
|
| 14 |
|
|
|
|
| 55 |
version="1.0.0"
|
| 56 |
)
|
| 57 |
|
| 58 |
+
## Add Prometheus middleware
|
| 59 |
+
app.add_middleware(PrometheusBodyMiddleware)
|
| 60 |
+
|
| 61 |
@app.get("/manifest.json")
|
| 62 |
def get_manifest():
|
| 63 |
return JSONResponse(content={
|
|
|
|
| 72 |
|
| 73 |
# Global inference engine instance
|
| 74 |
inference_engine = ModelInference()
|
|
|
|
| 75 |
demo = create_demo(inference_engine)
|
| 76 |
+
|
| 77 |
+
# Instrument the app with Prometheus metrics
|
| 78 |
+
instrumentator.instrument(app).expose(app,include_in_schema=False, should_gzip=True)
|
| 79 |
app = gr.mount_gradio_app(app, demo, path="/gradio")
|
| 80 |
|
| 81 |
@app.get("/")
|
|
|
|
| 87 |
|
| 88 |
|
| 89 |
@app.post("/predict", response_model=PredictionResponse)
|
| 90 |
+
async def predict(request: PredictionRequest, language: Literal["java", "python", "pharo"] = Query(
|
| 91 |
+
...
|
| 92 |
+
)):
|
| 93 |
"""
|
| 94 |
Endpoint to classify a list of code comments.
|
| 95 |
Dynamically loads the model from MLflow based on the request parameters.
|
| 96 |
"""
|
| 97 |
try:
|
| 98 |
+
logger.info(f"Received prediction request for language: {language}")
|
| 99 |
+
|
| 100 |
# Perform prediction using the inference engine
|
| 101 |
raw, predictions, run_id, artifact = inference_engine.predict_payload(
|
| 102 |
+
texts=request.texts, language=language
|
| 103 |
)
|
| 104 |
|
| 105 |
# Ensure predictions are serializable (convert numpy arrays to lists)
|
|
|
|
| 109 |
return PredictionResponse(
|
| 110 |
predictions=raw.tolist(),
|
| 111 |
labels=predictions,
|
| 112 |
+
model_info={"artifact": artifact, "language": language},
|
| 113 |
)
|
| 114 |
|
| 115 |
except Exception as e:
|
turing/api/resource_monitoring.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from typing import Callable
|
| 3 |
+
|
| 4 |
+
from fastapi import Request
|
| 5 |
+
from prometheus_client import Counter, Gauge
|
| 6 |
+
from prometheus_fastapi_instrumentator import Instrumentator, metrics
|
| 7 |
+
from prometheus_fastapi_instrumentator.metrics import Info
|
| 8 |
+
from starlette.middleware.base import BaseHTTPMiddleware
|
| 9 |
+
from starlette.types import Message
|
| 10 |
+
|
| 11 |
+
SUBSYSTEM = "model"
|
| 12 |
+
NAMESPACE = "turing_api"
|
| 13 |
+
|
| 14 |
+
# Define Prometheus metrics
|
| 15 |
+
instrumentator = Instrumentator(
|
| 16 |
+
should_group_status_codes=False,
|
| 17 |
+
should_ignore_untemplated=True,
|
| 18 |
+
should_respect_env_var=False,
|
| 19 |
+
should_instrument_requests_inprogress=True,
|
| 20 |
+
excluded_handlers=["/metrics"],
|
| 21 |
+
inprogress_name="fastapi_inprogress",
|
| 22 |
+
inprogress_labels=True
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
## Define custom metric for tracking requested languages
|
| 26 |
+
def http_requested_languages_total(
|
| 27 |
+
metric_name: str = "Total HTTP requested languages",
|
| 28 |
+
metric_description: str = "Total number of HTTP requests per programming language",
|
| 29 |
+
metric_namespace: str = NAMESPACE,
|
| 30 |
+
metric_subsystem: str = SUBSYSTEM ) -> Callable[[Info],None]:
|
| 31 |
+
METRIC = Counter(
|
| 32 |
+
metric_name,
|
| 33 |
+
metric_description,
|
| 34 |
+
namespace=metric_namespace,
|
| 35 |
+
subsystem=metric_subsystem,
|
| 36 |
+
labelnames=["language"]
|
| 37 |
+
)
|
| 38 |
+
async def instrumentation(info: Info) -> None:
|
| 39 |
+
try:
|
| 40 |
+
if info.modified_handler != "/predict":
|
| 41 |
+
return
|
| 42 |
+
lang = info.request.query_params.get("language")
|
| 43 |
+
except Exception:
|
| 44 |
+
print("Failed to get language from request")
|
| 45 |
+
lang = "other"
|
| 46 |
+
|
| 47 |
+
METRIC.labels(language=lang).inc()
|
| 48 |
+
|
| 49 |
+
return instrumentation
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
## Define custom metrics for tracking code comments in requests
|
| 53 |
+
http_request_code_comments_total = Counter (
|
| 54 |
+
"Total HTTP request code comments",
|
| 55 |
+
"Total number of comments in HTTP requests",
|
| 56 |
+
namespace=NAMESPACE,
|
| 57 |
+
subsystem=SUBSYSTEM,
|
| 58 |
+
labelnames=["language"]
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
## Define custom metrics for tracking characters in code comments
|
| 62 |
+
http_request_comment_characters_total = Counter(
|
| 63 |
+
"Total HTTP request code comment characters",
|
| 64 |
+
"Total number of characters in the HTTP requests",
|
| 65 |
+
namespace=NAMESPACE,
|
| 66 |
+
subsystem=SUBSYSTEM,
|
| 67 |
+
labelnames=["endpoint","language"]
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
## Define custom metric for tracking maximum characters per comment
|
| 72 |
+
http_request_maximum_characters_per_comment = Gauge(
|
| 73 |
+
"Maximum characters per comment",
|
| 74 |
+
"Maximum number of characters in a single comment from HTTP requests",
|
| 75 |
+
namespace=NAMESPACE,
|
| 76 |
+
subsystem=SUBSYSTEM,
|
| 77 |
+
labelnames=["language"]
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
## Middleware to extract and record metrics from request body
|
| 82 |
+
class PrometheusBodyMiddleware(BaseHTTPMiddleware):
|
| 83 |
+
async def dispatch(self, request: Request, call_next):
|
| 84 |
+
if request.url.path != "/predict":
|
| 85 |
+
return await call_next(request)
|
| 86 |
+
|
| 87 |
+
body_bytes = await request.body()
|
| 88 |
+
query_params = request.query_params
|
| 89 |
+
try:
|
| 90 |
+
if body_bytes:
|
| 91 |
+
language = query_params.get("language", "unknown")
|
| 92 |
+
body_json = json.loads(body_bytes)
|
| 93 |
+
print(f"Request body JSON: {body_json}")
|
| 94 |
+
texts = body_json.get("texts")
|
| 95 |
+
if texts:
|
| 96 |
+
total_characters = sum(len(example) for example in texts if example)
|
| 97 |
+
max_characters = max((len(example) for example in texts if example), default=0)
|
| 98 |
+
http_request_comment_characters_total.labels(endpoint="/predict", language=language).inc(total_characters)
|
| 99 |
+
http_request_maximum_characters_per_comment.labels(language=language).set(max_characters)
|
| 100 |
+
http_request_code_comments_total.labels(language=language).inc(len(texts))
|
| 101 |
+
except (json.JSONDecodeError, UnicodeDecodeError):
|
| 102 |
+
pass
|
| 103 |
+
|
| 104 |
+
async def receive() -> Message:
|
| 105 |
+
return {"type": "http.request", "body": body_bytes}
|
| 106 |
+
|
| 107 |
+
request._receive = receive
|
| 108 |
+
|
| 109 |
+
response = await call_next(request)
|
| 110 |
+
return response
|
| 111 |
+
|
| 112 |
+
## Register metrics with the instrumentator
|
| 113 |
+
instrumentator.add(
|
| 114 |
+
metrics.request_size(
|
| 115 |
+
should_include_handler=True,
|
| 116 |
+
should_include_method=False,
|
| 117 |
+
should_include_status=True,
|
| 118 |
+
metric_namespace=NAMESPACE,
|
| 119 |
+
metric_subsystem=SUBSYSTEM,
|
| 120 |
+
)
|
| 121 |
+
).add(
|
| 122 |
+
metrics.response_size(
|
| 123 |
+
should_include_handler=True,
|
| 124 |
+
should_include_method=False,
|
| 125 |
+
should_include_status=True,
|
| 126 |
+
metric_namespace=NAMESPACE,
|
| 127 |
+
metric_subsystem=SUBSYSTEM
|
| 128 |
+
)
|
| 129 |
+
).add(
|
| 130 |
+
http_requested_languages_total()
|
| 131 |
+
).add(
|
| 132 |
+
metrics.requests(
|
| 133 |
+
should_include_handler=True,
|
| 134 |
+
should_include_method=True,
|
| 135 |
+
should_include_status=True,
|
| 136 |
+
metric_namespace=NAMESPACE,
|
| 137 |
+
metric_subsystem=SUBSYSTEM
|
| 138 |
+
)
|
| 139 |
+
).add(
|
| 140 |
+
metrics.latency(
|
| 141 |
+
should_include_handler=True,
|
| 142 |
+
should_include_method=False,
|
| 143 |
+
should_include_status=True,
|
| 144 |
+
metric_namespace=NAMESPACE,
|
| 145 |
+
metric_subsystem=SUBSYSTEM
|
| 146 |
+
)
|
| 147 |
+
)
|
| 148 |
+
|
turing/api/schemas.py
CHANGED
|
@@ -10,9 +10,6 @@ class PredictionRequest(BaseModel):
|
|
| 10 |
description="List of code comments to classify",
|
| 11 |
example=["public void main", "def init self"],
|
| 12 |
)
|
| 13 |
-
language: str = Field(
|
| 14 |
-
..., description="Programming language (java, python, pharo)", example="java"
|
| 15 |
-
)
|
| 16 |
|
| 17 |
|
| 18 |
# Output Schema
|
|
|
|
| 10 |
description="List of code comments to classify",
|
| 11 |
example=["public void main", "def init self"],
|
| 12 |
)
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
|
| 15 |
# Output Schema
|