File size: 5,659 Bytes
a0d2bf2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142

import os
import pandas as pd
import joblib
from huggingface_hub import HfApi, login, upload_file
from datasets import load_dataset
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def create_dockerfile():
    with open("Dockerfile", "w") as f:
        f.write('''
FROM python:3.12-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY app.py .
COPY model.joblib .
COPY columns.joblib .
COPY input_data.csv .
EXPOSE 7860
CMD ["waitress-serve", "--host=0.0.0.0", "--port=7860", "--threads=4", "--call", "app:app"]
''')
    logging.info("Dockerfile created")

def create_requirements():
    with open("requirements.txt", "w") as f:
        f.write('''
numpy==1.26.4
pandas==2.2.2
scikit-learn==1.6.1
joblib==1.4.2
dill==0.3.8
flask==3.0.3
waitress==3.0.0
''')
    logging.info("requirements.txt created")

def create_app():
    with open("app.py", "w") as f:
        f.write('''
from flask import Flask, request, jsonify
import pandas as pd
import joblib
import logging
import os
import json

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

app = Flask(__name__)
base_dir = '/app' if os.path.exists('/app') else os.getcwd()
model = joblib.load(os.path.join(base_dir, "model.joblib"))
columns = joblib.load(os.path.join(base_dir, "columns.joblib"))

required_columns = ['Age', 'DurationOfPitch', 'NumberOfPersonVisiting', 'NumberOfFollowups', 
                   'PreferredPropertyStar', 'NumberOfTrips', 'PitchSatisfactionScore', 
                   'NumberOfChildrenVisiting', 'MonthlyIncome', 'TypeofContact', 
                   'Occupation', 'Gender', 'ProductPitched', 'MaritalStatus', 
                   'Designation', 'CityTier']

@app.route('/health', methods=['GET'])
def health():
    return jsonify({'status': 'healthy'})

@app.route('/predict', methods=['POST'])
def predict():
    try:
        data = request.get_json(force=True)
        input_data = [data] if isinstance(data, dict) else data
        input_df = pd.DataFrame(input_data)
        num_cols = ['Age', 'DurationOfPitch', 'NumberOfPersonVisiting', 'NumberOfFollowups', 
                    'PreferredPropertyStar', 'NumberOfTrips', 'PitchSatisfactionScore', 
                    'NumberOfChildrenVisiting', 'MonthlyIncome']
        cat_cols = ['TypeofContact', 'Occupation', 'Gender', 'ProductPitched', 
                    'MaritalStatus', 'Designation', 'CityTier']
        for col in required_columns:
            if col not in input_df.columns:
                input_df[col] = 0.0 if col in num_cols else 'Unknown'
        input_df[num_cols] = input_df[num_cols].astype(float).fillna(input_df[num_cols].median())
        input_df[cat_cols] = input_df[cat_cols].fillna('Unknown')
        input_encoded = pd.get_dummies(input_df, columns=cat_cols, drop_first=True)
        for col in columns:
            if col not in input_encoded.columns:
                input_encoded[col] = 0
        input_encoded = input_encoded.reindex(columns=columns, fill_value=0)
        prediction = model.predict(input_encoded)
        return jsonify({'prediction': prediction.tolist()})
    except Exception as e:
        logger.error(f"Prediction failed: {str(e)}")
        return jsonify({'error': str(e)}), 400

if __name__ == "__main__":
    from waitress import serve
    serve(app, host='0.0.0.0', port=7860, threads=4)
''')
    logging.info("app.py created")

def prepare_sample_data():
    dataset = load_dataset("Shramik121/tourism-split-dataset")
    sample_df = pd.DataFrame(dataset['test']).sample(2)  # Reduced sample size
    sample_df.drop(columns=['ProdTaken'], inplace=True, errors='ignore')
    required_columns = ['Age', 'DurationOfPitch', 'NumberOfPersonVisiting', 'NumberOfFollowups', 
                       'PreferredPropertyStar', 'NumberOfTrips', 'PitchSatisfactionScore', 
                       'NumberOfChildrenVisiting', 'MonthlyIncome', 'TypeofContact', 
                       'Occupation', 'Gender', 'ProductPitched', 'MaritalStatus', 
                       'Designation', 'CityTier']
    num_cols = ['Age', 'DurationOfPitch', 'NumberOfPersonVisiting', 'NumberOfFollowups', 
                'PreferredPropertyStar', 'NumberOfTrips', 'PitchSatisfactionScore', 
                'NumberOfChildrenVisiting', 'MonthlyIncome']
    cat_cols = ['TypeofContact', 'Occupation', 'Gender', 'ProductPitched', 
                'MaritalStatus', 'Designation', 'CityTier']
    for col in required_columns:
        if col not in sample_df.columns:
            sample_df[col] = 0.0 if col in num_cols else 'Unknown'
    sample_df = sample_df[required_columns]
    sample_df.to_csv("input_data.csv", index=False)
    logging.info("input_data.csv created")

def deploy():
    login(token=os.getenv("HF_TOKEN"))
    space_name = os.getenv("SPACE_NAME", "Shramik121/tourism-rf-model")
    api = HfApi()
    api.create_repo(repo_id=space_name, repo_type="space", space_sdk="docker", private=False, exist_ok=True)
    files = ['app.py', 'model.joblib', 'columns.joblib', 'input_data.csv', 'requirements.txt', 'Dockerfile']
    for file in files:
        if os.path.exists(file):
            upload_file(path_or_fileobj=file, path_in_repo=file, repo_id=space_name, repo_type="space")
            logging.info(f"Uploaded {file} to {space_name}")
        else:
            logging.error(f"File {file} not found")
            raise FileNotFoundError(f"File {file} not found")

if __name__ == "__main__":
    create_dockerfile()
    create_requirements()
    create_app()
    prepare_sample_data()
    deploy()