github-actions[bot] commited on
Commit
66e683e
·
1 Parent(s): 5ecd2f9

Sync turing folder from GitHub

Browse files
turing/api/app.py CHANGED
@@ -1,12 +1,14 @@
1
  import base64
2
  import os
 
3
 
4
- from fastapi import FastAPI, HTTPException
5
  from fastapi.responses import JSONResponse
6
  import gradio as gr
7
  from loguru import logger
8
 
9
  from turing.api.demo import create_demo
 
10
  from turing.api.schemas import PredictionRequest, PredictionResponse
11
  from turing.modeling.predict import ModelInference
12
 
@@ -53,6 +55,9 @@ app = FastAPI(
53
  version="1.0.0"
54
  )
55
 
 
 
 
56
  @app.get("/manifest.json")
57
  def get_manifest():
58
  return JSONResponse(content={
@@ -67,8 +72,10 @@ def get_manifest():
67
 
68
  # Global inference engine instance
69
  inference_engine = ModelInference()
70
-
71
  demo = create_demo(inference_engine)
 
 
 
72
  app = gr.mount_gradio_app(app, demo, path="/gradio")
73
 
74
  @app.get("/")
@@ -80,17 +87,19 @@ def health_check():
80
 
81
 
82
  @app.post("/predict", response_model=PredictionResponse)
83
- def predict(request: PredictionRequest):
 
 
84
  """
85
  Endpoint to classify a list of code comments.
86
  Dynamically loads the model from MLflow based on the request parameters.
87
  """
88
  try:
89
- logger.info(f"Received prediction request for language: {request.language}")
90
-
91
  # Perform prediction using the inference engine
92
  raw, predictions, run_id, artifact = inference_engine.predict_payload(
93
- texts=request.texts, language=request.language
94
  )
95
 
96
  # Ensure predictions are serializable (convert numpy arrays to lists)
@@ -100,7 +109,7 @@ def predict(request: PredictionRequest):
100
  return PredictionResponse(
101
  predictions=raw.tolist(),
102
  labels=predictions,
103
- model_info={"artifact": artifact, "language": request.language},
104
  )
105
 
106
  except Exception as e:
 
1
  import base64
2
  import os
3
+ from typing import Literal
4
 
5
+ from fastapi import FastAPI, HTTPException, Query
6
  from fastapi.responses import JSONResponse
7
  import gradio as gr
8
  from loguru import logger
9
 
10
  from turing.api.demo import create_demo
11
+ from turing.api.resource_monitoring import PrometheusBodyMiddleware, instrumentator
12
  from turing.api.schemas import PredictionRequest, PredictionResponse
13
  from turing.modeling.predict import ModelInference
14
 
 
55
  version="1.0.0"
56
  )
57
 
58
+ ## Add Prometheus middleware
59
+ app.add_middleware(PrometheusBodyMiddleware)
60
+
61
  @app.get("/manifest.json")
62
  def get_manifest():
63
  return JSONResponse(content={
 
72
 
73
  # Global inference engine instance
74
  inference_engine = ModelInference()
 
75
  demo = create_demo(inference_engine)
76
+
77
+ # Instrument the app with Prometheus metrics
78
+ instrumentator.instrument(app).expose(app,include_in_schema=False, should_gzip=True)
79
  app = gr.mount_gradio_app(app, demo, path="/gradio")
80
 
81
  @app.get("/")
 
87
 
88
 
89
  @app.post("/predict", response_model=PredictionResponse)
90
+ async def predict(request: PredictionRequest, language: Literal["java", "python", "pharo"] = Query(
91
+ ...
92
+ )):
93
  """
94
  Endpoint to classify a list of code comments.
95
  Dynamically loads the model from MLflow based on the request parameters.
96
  """
97
  try:
98
+ logger.info(f"Received prediction request for language: {language}")
99
+
100
  # Perform prediction using the inference engine
101
  raw, predictions, run_id, artifact = inference_engine.predict_payload(
102
+ texts=request.texts, language=language
103
  )
104
 
105
  # Ensure predictions are serializable (convert numpy arrays to lists)
 
109
  return PredictionResponse(
110
  predictions=raw.tolist(),
111
  labels=predictions,
112
+ model_info={"artifact": artifact, "language": language},
113
  )
114
 
115
  except Exception as e:
turing/api/resource_monitoring.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from typing import Callable
3
+
4
+ from fastapi import Request
5
+ from prometheus_client import Counter, Gauge
6
+ from prometheus_fastapi_instrumentator import Instrumentator, metrics
7
+ from prometheus_fastapi_instrumentator.metrics import Info
8
+ from starlette.middleware.base import BaseHTTPMiddleware
9
+ from starlette.types import Message
10
+
11
+ SUBSYSTEM = "model"
12
+ NAMESPACE = "turing_api"
13
+
14
+ # Define Prometheus metrics
15
+ instrumentator = Instrumentator(
16
+ should_group_status_codes=False,
17
+ should_ignore_untemplated=True,
18
+ should_respect_env_var=False,
19
+ should_instrument_requests_inprogress=True,
20
+ excluded_handlers=["/metrics"],
21
+ inprogress_name="fastapi_inprogress",
22
+ inprogress_labels=True
23
+ )
24
+
25
+ ## Define custom metric for tracking requested languages
26
+ def http_requested_languages_total(
27
+ metric_name: str = "Total HTTP requested languages",
28
+ metric_description: str = "Total number of HTTP requests per programming language",
29
+ metric_namespace: str = NAMESPACE,
30
+ metric_subsystem: str = SUBSYSTEM ) -> Callable[[Info],None]:
31
+ METRIC = Counter(
32
+ metric_name,
33
+ metric_description,
34
+ namespace=metric_namespace,
35
+ subsystem=metric_subsystem,
36
+ labelnames=["language"]
37
+ )
38
+ async def instrumentation(info: Info) -> None:
39
+ try:
40
+ if info.modified_handler != "/predict":
41
+ return
42
+ lang = info.request.query_params.get("language")
43
+ except Exception:
44
+ print("Failed to get language from request")
45
+ lang = "other"
46
+
47
+ METRIC.labels(language=lang).inc()
48
+
49
+ return instrumentation
50
+
51
+
52
+ ## Define custom metrics for tracking code comments in requests
53
+ http_request_code_comments_total = Counter (
54
+ "Total HTTP request code comments",
55
+ "Total number of comments in HTTP requests",
56
+ namespace=NAMESPACE,
57
+ subsystem=SUBSYSTEM,
58
+ labelnames=["language"]
59
+ )
60
+
61
+ ## Define custom metrics for tracking characters in code comments
62
+ http_request_comment_characters_total = Counter(
63
+ "Total HTTP request code comment characters",
64
+ "Total number of characters in the HTTP requests",
65
+ namespace=NAMESPACE,
66
+ subsystem=SUBSYSTEM,
67
+ labelnames=["endpoint","language"]
68
+ )
69
+
70
+
71
+ ## Define custom metric for tracking maximum characters per comment
72
+ http_request_maximum_characters_per_comment = Gauge(
73
+ "Maximum characters per comment",
74
+ "Maximum number of characters in a single comment from HTTP requests",
75
+ namespace=NAMESPACE,
76
+ subsystem=SUBSYSTEM,
77
+ labelnames=["language"]
78
+ )
79
+
80
+
81
+ ## Middleware to extract and record metrics from request body
82
+ class PrometheusBodyMiddleware(BaseHTTPMiddleware):
83
+ async def dispatch(self, request: Request, call_next):
84
+ if request.url.path != "/predict":
85
+ return await call_next(request)
86
+
87
+ body_bytes = await request.body()
88
+ query_params = request.query_params
89
+ try:
90
+ if body_bytes:
91
+ language = query_params.get("language", "unknown")
92
+ body_json = json.loads(body_bytes)
93
+ print(f"Request body JSON: {body_json}")
94
+ texts = body_json.get("texts")
95
+ if texts:
96
+ total_characters = sum(len(example) for example in texts if example)
97
+ max_characters = max((len(example) for example in texts if example), default=0)
98
+ http_request_comment_characters_total.labels(endpoint="/predict", language=language).inc(total_characters)
99
+ http_request_maximum_characters_per_comment.labels(language=language).set(max_characters)
100
+ http_request_code_comments_total.labels(language=language).inc(len(texts))
101
+ except (json.JSONDecodeError, UnicodeDecodeError):
102
+ pass
103
+
104
+ async def receive() -> Message:
105
+ return {"type": "http.request", "body": body_bytes}
106
+
107
+ request._receive = receive
108
+
109
+ response = await call_next(request)
110
+ return response
111
+
112
+ ## Register metrics with the instrumentator
113
+ instrumentator.add(
114
+ metrics.request_size(
115
+ should_include_handler=True,
116
+ should_include_method=False,
117
+ should_include_status=True,
118
+ metric_namespace=NAMESPACE,
119
+ metric_subsystem=SUBSYSTEM,
120
+ )
121
+ ).add(
122
+ metrics.response_size(
123
+ should_include_handler=True,
124
+ should_include_method=False,
125
+ should_include_status=True,
126
+ metric_namespace=NAMESPACE,
127
+ metric_subsystem=SUBSYSTEM
128
+ )
129
+ ).add(
130
+ http_requested_languages_total()
131
+ ).add(
132
+ metrics.requests(
133
+ should_include_handler=True,
134
+ should_include_method=True,
135
+ should_include_status=True,
136
+ metric_namespace=NAMESPACE,
137
+ metric_subsystem=SUBSYSTEM
138
+ )
139
+ ).add(
140
+ metrics.latency(
141
+ should_include_handler=True,
142
+ should_include_method=False,
143
+ should_include_status=True,
144
+ metric_namespace=NAMESPACE,
145
+ metric_subsystem=SUBSYSTEM
146
+ )
147
+ )
148
+
turing/api/schemas.py CHANGED
@@ -10,9 +10,6 @@ class PredictionRequest(BaseModel):
10
  description="List of code comments to classify",
11
  example=["public void main", "def init self"],
12
  )
13
- language: str = Field(
14
- ..., description="Programming language (java, python, pharo)", example="java"
15
- )
16
 
17
 
18
  # Output Schema
 
10
  description="List of code comments to classify",
11
  example=["public void main", "def init self"],
12
  )
 
 
 
13
 
14
 
15
  # Output Schema