santanche commited on
Commit
216b413
·
1 Parent(s): b05ac23

feat (app): initial app configuration

Browse files
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ COPY --chown=user . /app
16
+ CMD ["uvicorn", "server_cancer_predictor:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -10,3 +10,7 @@ short_description: Breast Cancer Diagnostic - FastAPI
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
13
+
14
+ ## Breast Cancer Wisconsin (Diagnostic)
15
+
16
+ See at [DataSci4Health](https://github.com/datasci4health/datasci4health.github.io/tree/master/data/breast-cancer/wisconsin)
breast-cancer-wisconsin_samples.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ diagnosis,radius_mean,texture_mean,symmetry_mean,fractal_dimension_mean
2
+ B,12.47,18.6,0.1925,0.06373
3
+ M,18.94,21.31,0.1582,0.05461
4
+ M,15.46,19.48,0.1931,0.05796
5
+ B,12.4,17.68,0.1811,0.07102
cancer_predictor.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module provides the CancerPredictor class for training and predicting breast cancer diagnosis
3
+ using logistic regression. It leverages scikit-learn for model training, evaluation, and prediction,
4
+ and pandas for data manipulation. The predictor expects input features such as radius_mean,
5
+ texture_mean, symmetry_mean, and fractal_dimension_mean, and outputs a diagnosis prediction.
6
+
7
+ Classes:
8
+ CancerPredictor: Handles training on a CSV dataset and making predictions
9
+ based on input features.
10
+ """
11
+ import pandas as pd
12
+ from sklearn.linear_model import LogisticRegression
13
+ from sklearn.preprocessing import LabelEncoder
14
+ from sklearn.metrics import accuracy_score
15
+
16
+ class CancerPredictor:
17
+ """
18
+ CancerPredictor is a class for training and making predictions on breast cancer diagnosis
19
+ using logistic regression.
20
+ """
21
+
22
+ def __init__(self):
23
+ self.model = LogisticRegression()
24
+ self.le_diagnosis = LabelEncoder()
25
+
26
+ def train(self, csv_train, csv_test):
27
+ """
28
+ Trains the logistic regression model using a CSV file containing breast cancer data.
29
+ The CSV must include columns: 'radius_mean', 'texture_mean', 'symmetry_mean',
30
+ 'fractal_dimension_mean', and 'diagnosis'.
31
+ Prints the model accuracy after training.
32
+ """
33
+ # Load the train data
34
+ data_train = pd.read_csv(csv_train)
35
+
36
+ # Encode categorical variables
37
+ data_train['diagnosis'] = self.le_diagnosis.fit_transform(data_train['diagnosis'])
38
+
39
+ # Split features and target
40
+ X_train = data_train[
41
+ ['radius_mean', 'texture_mean', 'symmetry_mean', 'fractal_dimension_mean']]
42
+ y_train = data_train['diagnosis']
43
+
44
+ # Train the model
45
+ self.model.fit(X_train, y_train)
46
+
47
+ # Load the test data
48
+ data_test = pd.read_csv(csv_test)
49
+
50
+ # Encode categorical variables
51
+ data_test['diagnosis'] = self.le_diagnosis.fit_transform(data_test['diagnosis'])
52
+
53
+ # Split features and target
54
+ X_test = data_test[
55
+ ['radius_mean', 'texture_mean', 'symmetry_mean', 'fractal_dimension_mean']]
56
+ y_test = data_test['diagnosis']
57
+
58
+ # Evaluate the model
59
+ y_pred = self.model.predict(X_test)
60
+ accuracy = accuracy_score(y_test, y_pred)
61
+ print(f"Model accuracy: {accuracy:.2f}")
62
+
63
+ def predict(self, radius_mean, texture_mean, symmetry_mean, fractal_dimension_mean):
64
+ """
65
+ Predicts the diagnosis ('M' for malignant or 'B' for benign) based on the provided
66
+ feature values.
67
+ Returns the predicted diagnosis as a string.
68
+ """
69
+ # Create a DataFrame with the same feature names as the training data
70
+ input_data = pd.DataFrame(
71
+ [[radius_mean, texture_mean, symmetry_mean, fractal_dimension_mean]],
72
+ columns=['radius_mean', 'texture_mean', 'symmetry_mean', 'fractal_dimension_mean'])
73
+
74
+ # Make prediction
75
+ prediction = self.model.predict(input_data)
76
+
77
+ # Decode prediction
78
+ diagnosis = self.le_diagnosis.inverse_transform(prediction)[0]
79
+
80
+ return diagnosis
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ fastapi
2
+ gunicorn
server_cancer_predictor.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module provides a FastAPI-based web server for breast cancer prediction and model training.
3
+ It exposes endpoints for training a cancer prediction model asynchronously,
4
+ checking training status, and making predictions based on input features.
5
+
6
+ Endpoints:
7
+ - POST /train: Starts model training in the background using a provided data file.
8
+ - GET /training_status: Returns the current status of the model training process.
9
+ - GET /predict: Predicts cancer diagnosis based on input features
10
+ (radius_mean, texture_mean, symmetry_mean, fractal_dimension_mean).
11
+
12
+ Dependencies:
13
+ - FastAPI for API creation
14
+ - Pydantic for request validation
15
+ - uvicorn for running the server
16
+ - CancerPredictor class for model operations (imported from cancer_predictor.py)
17
+
18
+ Usage:
19
+ Run this module to start the API server. Use the endpoints to train the model and make predictions.
20
+ """
21
+
22
+ from fastapi import FastAPI, HTTPException, BackgroundTasks
23
+ import uvicorn
24
+
25
+ # Import the CancerPredictor class
26
+ from cancer_predictor import CancerPredictor
27
+
28
+ app = FastAPI()
29
+
30
+ # Create a global instance of CancerPredictor
31
+ predictor = CancerPredictor()
32
+
33
+ # Global variable to store training status
34
+ training_status = "Not started"
35
+
36
+ def train_model(train_path: str, test_path: str):
37
+ global training_status
38
+ training_status = "In progress"
39
+ try:
40
+ predictor.train(train_path, test_path)
41
+ training_status = "Completed"
42
+ except Exception as e:
43
+ training_status = f"Failed: {str(e)}"
44
+
45
+ @app.post("/train")
46
+ async def train(train_path: str, test_path: str, background_tasks: BackgroundTasks):
47
+ background_tasks.add_task(train_model, train_path, test_path)
48
+ return {"message": "Training started in the background"}
49
+
50
+ @app.get("/training_status")
51
+ async def get_training_status():
52
+ return {"status": training_status}
53
+
54
+ @app.get("/predict")
55
+ async def predict(radius_mean: float, texture_mean: float, symmetry_mean: float, fractal_dimension_mean: float):
56
+ if training_status != "Completed":
57
+ raise HTTPException(status_code=400, detail="Model not trained yet")
58
+ try:
59
+ predicted_diagnosis = predictor.predict(
60
+ radius_mean, texture_mean, symmetry_mean, fractal_dimension_mean)
61
+ return {"diagnosis": str(predicted_diagnosis)}
62
+ except Exception as e:
63
+ raise HTTPException(status_code=500, detail=str(e)) from e
64
+
65
+ if __name__ == "__main__":
66
+ uvicorn.run(app, host="0.0.0.0", port=8000)