tourism-rf-model / src /deploy.py
Shramik121's picture
Upload model and application files to Hugging Face Space
a0d2bf2 verified
import os
import pandas as pd
import joblib
from huggingface_hub import HfApi, login, upload_file
from datasets import load_dataset
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def create_dockerfile():
with open("Dockerfile", "w") as f:
f.write('''
FROM python:3.12-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY app.py .
COPY model.joblib .
COPY columns.joblib .
COPY input_data.csv .
EXPOSE 7860
CMD ["waitress-serve", "--host=0.0.0.0", "--port=7860", "--threads=4", "--call", "app:app"]
''')
logging.info("Dockerfile created")
def create_requirements():
with open("requirements.txt", "w") as f:
f.write('''
numpy==1.26.4
pandas==2.2.2
scikit-learn==1.6.1
joblib==1.4.2
dill==0.3.8
flask==3.0.3
waitress==3.0.0
''')
logging.info("requirements.txt created")
def create_app():
with open("app.py", "w") as f:
f.write('''
from flask import Flask, request, jsonify
import pandas as pd
import joblib
import logging
import os
import json
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
app = Flask(__name__)
base_dir = '/app' if os.path.exists('/app') else os.getcwd()
model = joblib.load(os.path.join(base_dir, "model.joblib"))
columns = joblib.load(os.path.join(base_dir, "columns.joblib"))
required_columns = ['Age', 'DurationOfPitch', 'NumberOfPersonVisiting', 'NumberOfFollowups',
'PreferredPropertyStar', 'NumberOfTrips', 'PitchSatisfactionScore',
'NumberOfChildrenVisiting', 'MonthlyIncome', 'TypeofContact',
'Occupation', 'Gender', 'ProductPitched', 'MaritalStatus',
'Designation', 'CityTier']
@app.route('/health', methods=['GET'])
def health():
return jsonify({'status': 'healthy'})
@app.route('/predict', methods=['POST'])
def predict():
try:
data = request.get_json(force=True)
input_data = [data] if isinstance(data, dict) else data
input_df = pd.DataFrame(input_data)
num_cols = ['Age', 'DurationOfPitch', 'NumberOfPersonVisiting', 'NumberOfFollowups',
'PreferredPropertyStar', 'NumberOfTrips', 'PitchSatisfactionScore',
'NumberOfChildrenVisiting', 'MonthlyIncome']
cat_cols = ['TypeofContact', 'Occupation', 'Gender', 'ProductPitched',
'MaritalStatus', 'Designation', 'CityTier']
for col in required_columns:
if col not in input_df.columns:
input_df[col] = 0.0 if col in num_cols else 'Unknown'
input_df[num_cols] = input_df[num_cols].astype(float).fillna(input_df[num_cols].median())
input_df[cat_cols] = input_df[cat_cols].fillna('Unknown')
input_encoded = pd.get_dummies(input_df, columns=cat_cols, drop_first=True)
for col in columns:
if col not in input_encoded.columns:
input_encoded[col] = 0
input_encoded = input_encoded.reindex(columns=columns, fill_value=0)
prediction = model.predict(input_encoded)
return jsonify({'prediction': prediction.tolist()})
except Exception as e:
logger.error(f"Prediction failed: {str(e)}")
return jsonify({'error': str(e)}), 400
if __name__ == "__main__":
from waitress import serve
serve(app, host='0.0.0.0', port=7860, threads=4)
''')
logging.info("app.py created")
def prepare_sample_data():
dataset = load_dataset("Shramik121/tourism-split-dataset")
sample_df = pd.DataFrame(dataset['test']).sample(2) # Reduced sample size
sample_df.drop(columns=['ProdTaken'], inplace=True, errors='ignore')
required_columns = ['Age', 'DurationOfPitch', 'NumberOfPersonVisiting', 'NumberOfFollowups',
'PreferredPropertyStar', 'NumberOfTrips', 'PitchSatisfactionScore',
'NumberOfChildrenVisiting', 'MonthlyIncome', 'TypeofContact',
'Occupation', 'Gender', 'ProductPitched', 'MaritalStatus',
'Designation', 'CityTier']
num_cols = ['Age', 'DurationOfPitch', 'NumberOfPersonVisiting', 'NumberOfFollowups',
'PreferredPropertyStar', 'NumberOfTrips', 'PitchSatisfactionScore',
'NumberOfChildrenVisiting', 'MonthlyIncome']
cat_cols = ['TypeofContact', 'Occupation', 'Gender', 'ProductPitched',
'MaritalStatus', 'Designation', 'CityTier']
for col in required_columns:
if col not in sample_df.columns:
sample_df[col] = 0.0 if col in num_cols else 'Unknown'
sample_df = sample_df[required_columns]
sample_df.to_csv("input_data.csv", index=False)
logging.info("input_data.csv created")
def deploy():
login(token=os.getenv("HF_TOKEN"))
space_name = os.getenv("SPACE_NAME", "Shramik121/tourism-rf-model")
api = HfApi()
api.create_repo(repo_id=space_name, repo_type="space", space_sdk="docker", private=False, exist_ok=True)
files = ['app.py', 'model.joblib', 'columns.joblib', 'input_data.csv', 'requirements.txt', 'Dockerfile']
for file in files:
if os.path.exists(file):
upload_file(path_or_fileobj=file, path_in_repo=file, repo_id=space_name, repo_type="space")
logging.info(f"Uploaded {file} to {space_name}")
else:
logging.error(f"File {file} not found")
raise FileNotFoundError(f"File {file} not found")
if __name__ == "__main__":
create_dockerfile()
create_requirements()
create_app()
prepare_sample_data()
deploy()