Abid Ali Awan commited on
Commit
8c1d338
·
1 Parent(s): 98a65c8

feat: Add data analysis tool, implement task-specific model training with enhanced metrics and deployment examples, and refactor UI for manual testing.

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. app.py +111 -28
  3. modal_backend.py +38 -14
  4. requirements.txt +2 -1
.gitignore CHANGED
@@ -3,3 +3,4 @@ __pycache__/
3
  *.pyc
4
  .ipynb_checkpoints/
5
  local/models/
 
 
3
  *.pyc
4
  .ipynb_checkpoints/
5
  local/models/
6
+ .venv
app.py CHANGED
@@ -3,36 +3,58 @@ import modal
3
  import json
4
 
5
  # Initialize Modal function references
6
- f_analyze = modal.Function.lookup("mlops-backend", "analyze_data")
7
- f_train = modal.Function.lookup("mlops-backend", "train_model")
8
- f_check = modal.Function.lookup("mlops-backend", "check_model")
9
 
10
- def train_model_tool(file_path: str, target_column: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  """
12
  Trains a model on the uploaded CSV file.
13
 
14
  Args:
15
  file_path: The path to the uploaded CSV file.
16
  target_column: The name of the column to predict.
 
 
 
17
  """
18
  with open(file_path, "r") as f:
19
  content = f.read()
20
 
21
  # Call Modal backend
22
- result = f_train.remote(content, target_column)
23
 
24
  return json.dumps({
25
- "message": "Training complete.",
26
  "model_id": result['model_id'],
27
- "accuracy": result['accuracy']
28
  }, indent=2)
29
 
30
- def deploy_model_tool(model_id: str):
31
  """
32
  Deploys a trained model and returns the API usage code.
33
 
34
  Args:
35
  model_id: The ID of the model to deploy.
 
 
36
  """
37
  # Verify model exists
38
  check = f_check.remote(model_id)
@@ -40,7 +62,7 @@ def deploy_model_tool(model_id: str):
40
  return f"Error: Model {model_id} not found."
41
 
42
  # Construct the API usage example
43
- api_url = "https://[YOUR_MODAL_USERNAME]--mlops-backend-predict-api.modal.run"
44
 
45
  usage_code = f"""
46
  import requests
@@ -48,21 +70,33 @@ import requests
48
  url = "{api_url}"
49
  payload = {{
50
  "model_id": "{model_id}",
51
- "data": {{ ... your input data ... }}
52
  }}
53
  response = requests.post(url, json=payload)
54
  print(response.json())
55
  """
56
 
57
- return f"Model {model_id} is live!\n\nEndpoint: {api_url}\n\nUsage:\n```python\n{usage_code}\n```"
 
 
 
 
 
 
 
 
 
58
 
59
- def auto_deploy_tool(file_path: str, target_column: str):
60
  """
61
  Full Pipeline: Analyzes data, trains model, evaluates, and deploys it in one go.
62
 
63
  Args:
64
  file_path: The path to the uploaded CSV file.
65
  target_column: The name of the column to predict.
 
 
 
66
  """
67
  with open(file_path, "r") as f:
68
  content = f.read()
@@ -71,12 +105,13 @@ def auto_deploy_tool(file_path: str, target_column: str):
71
  analysis = f_analyze.remote(content)
72
 
73
  # 2. Train & Evaluate
74
- train_result = f_train.remote(content, target_column)
75
  model_id = train_result['model_id']
76
- accuracy = train_result['accuracy']
 
77
 
78
  # 3. Deploy (Construct Info)
79
- api_url = "https://[YOUR_MODAL_USERNAME]--mlops-backend-predict-api.modal.run"
80
 
81
  usage_code = f"""
82
  import requests
@@ -89,6 +124,15 @@ payload = {{
89
  response = requests.post(url, json=payload)
90
  print(response.json())
91
  """
 
 
 
 
 
 
 
 
 
92
 
93
  report = f"""# Auto-Deployment Report
94
 
@@ -97,31 +141,70 @@ print(response.json())
97
  - **Columns**: {', '.join(analysis['columns'])}
98
 
99
  ## 2. Model Training
100
- - **Status**: Success
101
  - **Model ID**: `{model_id}`
102
- - **Accuracy**: {accuracy:.2%}
103
 
104
  ## 3. Deployment
105
  The model is live at: `{api_url}`
106
 
107
- ### Usage Code
108
  ```python
109
  {usage_code}
110
  ```
 
 
 
 
 
111
  """
112
  return report
113
 
114
- def dummy_chat(message, history):
115
- return "I am an MCP Server. Please connect to me via an MCP client to use my tools."
 
 
 
 
 
 
 
 
 
 
 
116
 
117
- # Expose tools via ChatInterface
118
- # When mcp_server=True, the tools listed here are exposed to the client
119
- demo = gr.ChatInterface(
120
- fn=dummy_chat,
121
- tools=[train_model_tool, deploy_model_tool, auto_deploy_tool],
122
- title="Auto-Deployer MCP Server",
123
- description="MCP Server with Training, Deployment, and Auto-Deploy tools."
124
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  if __name__ == "__main__":
127
  demo.launch(mcp_server=True)
 
 
 
3
  import json
4
 
5
  # Initialize Modal function references
6
+ f_analyze = modal.Function.from_name("mlops-backend", "analyze_data")
7
+ f_train = modal.Function.from_name("mlops-backend", "train_model")
8
+ f_check = modal.Function.from_name("mlops-backend", "check_model")
9
 
10
+ def analyze_data_tool(file_path: str) -> str:
11
+ """
12
+ Analyzes the uploaded CSV file and returns statistical metadata.
13
+
14
+ Args:
15
+ file_path: The path to the uploaded CSV file.
16
+ Returns:
17
+ str: JSON string with analysis results.
18
+ """
19
+ with open(file_path, "r") as f:
20
+ content = f.read()
21
+
22
+ # Call Modal backend
23
+ result = f_analyze.remote(content)
24
+
25
+ return json.dumps(result, indent=2)
26
+
27
+ def train_model_tool(file_path: str, target_column: str, task_type: str = "classification") -> str:
28
  """
29
  Trains a model on the uploaded CSV file.
30
 
31
  Args:
32
  file_path: The path to the uploaded CSV file.
33
  target_column: The name of the column to predict.
34
+ task_type: The type of task: "classification", "regression", or "time_series".
35
+ Returns:
36
+ str: JSON string with training results.
37
  """
38
  with open(file_path, "r") as f:
39
  content = f.read()
40
 
41
  # Call Modal backend
42
+ result = f_train.remote(content, target_column, task_type)
43
 
44
  return json.dumps({
45
+ "message": result['message'],
46
  "model_id": result['model_id'],
47
+ "metric": f"{result['metric_name']}: {result['metric_value']:.4f}"
48
  }, indent=2)
49
 
50
+ def deploy_model_tool(model_id: str) -> str:
51
  """
52
  Deploys a trained model and returns the API usage code.
53
 
54
  Args:
55
  model_id: The ID of the model to deploy.
56
+ Returns:
57
+ str: Deployment status and usage code.
58
  """
59
  # Verify model exists
60
  check = f_check.remote(model_id)
 
62
  return f"Error: Model {model_id} not found."
63
 
64
  # Construct the API usage example
65
+ api_url = "https://abidali899--mlops-backend-predict-api.modal.run"
66
 
67
  usage_code = f"""
68
  import requests
 
70
  url = "{api_url}"
71
  payload = {{
72
  "model_id": "{model_id}",
73
+ "data": {{ "col1": "val1", "col2": "val2" }}
74
  }}
75
  response = requests.post(url, json=payload)
76
  print(response.json())
77
  """
78
 
79
+ curl_code = f"""
80
+ curl -X POST {api_url} \\
81
+ -H "Content-Type: application/json" \\
82
+ -d '{{
83
+ "model_id": "{model_id}",
84
+ "data": {{ "col1": "val1", "col2": "val2" }}
85
+ }}'
86
+ """
87
+
88
+ return f"Model {model_id} is live!\n\nEndpoint: {api_url}\n\n### Usage (Python):\n```python\n{usage_code}\n```\n\n### Usage (cURL):\n```bash\n{curl_code}\n```"
89
 
90
+ def auto_deploy_tool(file_path: str, target_column: str, task_type: str = "classification") -> str:
91
  """
92
  Full Pipeline: Analyzes data, trains model, evaluates, and deploys it in one go.
93
 
94
  Args:
95
  file_path: The path to the uploaded CSV file.
96
  target_column: The name of the column to predict.
97
+ task_type: The type of task: "classification", "regression", or "time_series".
98
+ Returns:
99
+ str: A detailed report of the pipeline execution.
100
  """
101
  with open(file_path, "r") as f:
102
  content = f.read()
 
105
  analysis = f_analyze.remote(content)
106
 
107
  # 2. Train & Evaluate
108
+ train_result = f_train.remote(content, target_column, task_type)
109
  model_id = train_result['model_id']
110
+ metric_val = train_result['metric_value']
111
+ metric_name = train_result['metric_name']
112
 
113
  # 3. Deploy (Construct Info)
114
+ api_url = "https://abidali899--mlops-backend-predict-api.modal.run"
115
 
116
  usage_code = f"""
117
  import requests
 
124
  response = requests.post(url, json=payload)
125
  print(response.json())
126
  """
127
+
128
+ curl_code = f"""
129
+ curl -X POST {api_url} \\
130
+ -H "Content-Type: application/json" \\
131
+ -d '{{
132
+ "model_id": "{model_id}",
133
+ "data": {{ "col1": "val1", "col2": "val2" }}
134
+ }}'
135
+ """
136
 
137
  report = f"""# Auto-Deployment Report
138
 
 
141
  - **Columns**: {', '.join(analysis['columns'])}
142
 
143
  ## 2. Model Training
144
+ - **Task**: {task_type}
145
  - **Model ID**: `{model_id}`
146
+ - **{metric_name.capitalize()}**: {metric_val:.4f}
147
 
148
  ## 3. Deployment
149
  The model is live at: `{api_url}`
150
 
151
+ ### Usage Code (Python)
152
  ```python
153
  {usage_code}
154
  ```
155
+
156
+ ### Usage Code (cURL)
157
+ ```bash
158
+ {curl_code}
159
+ ```
160
  """
161
  return report
162
 
163
+ with gr.Blocks() as demo:
164
+ gr.Markdown("# Auto-Deployer MCP Server")
165
+ gr.Markdown("This server exposes the following tools to MCP clients:")
166
+ gr.Markdown("- `analyze_data_tool`")
167
+ gr.Markdown("- `train_model_tool`")
168
+ gr.Markdown("- `deploy_model_tool`")
169
+ gr.Markdown("- `auto_deploy_tool`")
170
+
171
+ # Register tools using gr.api
172
+ gr.api(analyze_data_tool)
173
+ gr.api(train_model_tool)
174
+ gr.api(deploy_model_tool)
175
+ gr.api(auto_deploy_tool)
176
 
177
+ gr.Markdown("## Manual Testing Interface")
178
+
179
+ with gr.Tab("Analyze"):
180
+ an_file = gr.File(label="CSV File")
181
+ an_btn = gr.Button("Analyze Data")
182
+ an_out = gr.JSON(label="Output")
183
+ an_btn.click(analyze_data_tool, [an_file], an_out)
184
+
185
+ with gr.Tab("Train"):
186
+ t_file = gr.File(label="CSV File")
187
+ t_col = gr.Textbox(label="Target Column")
188
+ t_type = gr.Dropdown(["classification", "regression", "time_series"], label="Task Type", value="classification")
189
+ t_btn = gr.Button("Train")
190
+ t_out = gr.JSON(label="Output")
191
+ t_btn.click(train_model_tool, [t_file, t_col, t_type], t_out)
192
+
193
+ with gr.Tab("Deploy"):
194
+ d_id = gr.Textbox(label="Model ID")
195
+ d_btn = gr.Button("Deploy")
196
+ d_out = gr.Markdown(label="Output")
197
+ d_btn.click(deploy_model_tool, [d_id], d_out)
198
+
199
+ with gr.Tab("Auto Deploy"):
200
+ a_file = gr.File(label="CSV File")
201
+ a_col = gr.Textbox(label="Target Column")
202
+ a_type = gr.Dropdown(["classification", "regression", "time_series"], label="Task Type", value="classification")
203
+ a_btn = gr.Button("Auto Deploy")
204
+ a_out = gr.Markdown(label="Output")
205
+ a_btn.click(auto_deploy_tool, [a_file, a_col, a_type], a_out)
206
 
207
  if __name__ == "__main__":
208
  demo.launch(mcp_server=True)
209
+
210
+
modal_backend.py CHANGED
@@ -1,7 +1,7 @@
1
  import modal
2
  import pandas as pd
3
  import io
4
- from sklearn.ensemble import RandomForestClassifier
5
  from sklearn.model_selection import train_test_split
6
  from sklearn.metrics import accuracy_score
7
  import joblib
@@ -10,7 +10,7 @@ import json
10
  app = modal.App("mlops-backend")
11
  volume = modal.Volume.from_name("model-registry", create_if_missing=True)
12
 
13
- image = modal.Image.debian_slim().pip_install("pandas", "scikit-learn", "joblib")
14
 
15
  @app.function(image=image)
16
  def analyze_data(csv_content: str):
@@ -27,9 +27,10 @@ def analyze_data(csv_content: str):
27
  }
28
 
29
  @app.function(image=image, volumes={"/models": volume})
30
- def train_model(csv_content: str, target_col: str):
31
  """
32
- Trains a Random Forest model on the CSV content.
 
33
  """
34
  df = pd.read_csv(io.StringIO(csv_content))
35
 
@@ -39,35 +40,58 @@ def train_model(csv_content: str, target_col: str):
39
  X = df.drop(columns=[target_col])
40
  y = df[target_col]
41
 
42
- # Simple handling for non-numeric data for demo purposes
43
  X = pd.get_dummies(X)
44
 
45
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 
 
 
 
 
 
 
 
 
 
46
 
47
- model = RandomForestClassifier()
48
  model.fit(X_train, y_train)
49
 
50
  y_pred = model.predict(X_test)
51
- accuracy = accuracy_score(y_test, y_pred)
 
 
 
 
 
 
 
 
 
52
 
53
  model_id = f"model_{int(pd.Timestamp.now().timestamp())}"
54
  model_path = f"/models/{model_id}.joblib"
55
  joblib.dump(model, model_path)
56
- volume.commit() # Ensure data is persisted
57
 
58
- # Save metadata for the API to know expected columns
59
  meta_path = f"/models/{model_id}_meta.json"
60
  with open(meta_path, "w") as f:
61
- json.dump({"columns": list(X.columns)}, f)
 
 
 
62
  volume.commit()
63
 
64
  return {
65
  "model_id": model_id,
66
- "accuracy": accuracy,
67
- "message": "Model trained successfully."
 
68
  }
69
 
70
- @app.web_endpoint(method="POST")
 
71
  def predict_api(item: dict):
72
  """
73
  Prediction API endpoint.
 
1
  import modal
2
  import pandas as pd
3
  import io
4
+ from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
5
  from sklearn.model_selection import train_test_split
6
  from sklearn.metrics import accuracy_score
7
  import joblib
 
10
  app = modal.App("mlops-backend")
11
  volume = modal.Volume.from_name("model-registry", create_if_missing=True)
12
 
13
+ image = modal.Image.debian_slim().pip_install("pandas", "scikit-learn", "joblib", "fastapi[standard]")
14
 
15
  @app.function(image=image)
16
  def analyze_data(csv_content: str):
 
27
  }
28
 
29
  @app.function(image=image, volumes={"/models": volume})
30
+ def train_model(csv_content: str, target_col: str, task_type: str = "classification"):
31
  """
32
+ Trains a model based on the task type.
33
+ task_type: "classification", "regression", or "time_series"
34
  """
35
  df = pd.read_csv(io.StringIO(csv_content))
36
 
 
40
  X = df.drop(columns=[target_col])
41
  y = df[target_col]
42
 
43
+ # Simple handling for non-numeric data
44
  X = pd.get_dummies(X)
45
 
46
+ # Configure split and model based on task
47
+ if task_type == "time_series":
48
+ # Time series requires non-shuffled split
49
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
50
+ model = RandomForestRegressor()
51
+ elif task_type == "regression":
52
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
53
+ model = RandomForestRegressor()
54
+ else: # classification
55
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
56
+ model = RandomForestClassifier()
57
 
 
58
  model.fit(X_train, y_train)
59
 
60
  y_pred = model.predict(X_test)
61
+
62
+ # Calculate metrics
63
+ if task_type == "classification":
64
+ from sklearn.metrics import accuracy_score
65
+ metric_name = "accuracy"
66
+ metric_val = accuracy_score(y_test, y_pred)
67
+ else:
68
+ from sklearn.metrics import mean_squared_error
69
+ metric_name = "mse"
70
+ metric_val = mean_squared_error(y_test, y_pred)
71
 
72
  model_id = f"model_{int(pd.Timestamp.now().timestamp())}"
73
  model_path = f"/models/{model_id}.joblib"
74
  joblib.dump(model, model_path)
75
+ volume.commit()
76
 
77
+ # Save metadata
78
  meta_path = f"/models/{model_id}_meta.json"
79
  with open(meta_path, "w") as f:
80
+ json.dump({
81
+ "columns": list(X.columns),
82
+ "task_type": task_type
83
+ }, f)
84
  volume.commit()
85
 
86
  return {
87
  "model_id": model_id,
88
+ "metric_name": metric_name,
89
+ "metric_value": metric_val,
90
+ "message": f"{task_type.capitalize()} model trained successfully."
91
  }
92
 
93
+ @app.function(image=image, volumes={"/models": volume})
94
+ @modal.fastapi_endpoint(method="POST")
95
  def predict_api(item: dict):
96
  """
97
  Prediction API endpoint.
requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
  modal==1.2.2
2
- gradio==5.49.1
3
  pandas==2.3.3
4
  scikit-learn==1.7.2
5
  joblib==1.5.2
 
 
1
  modal==1.2.2
2
+ gradio[mcp]==5.49.1
3
  pandas==2.3.3
4
  scikit-learn==1.7.2
5
  joblib==1.5.2
6
+ requests==2.32.5