Spaces:

Shramik121
/

tourism-rf-model

Runtime error

App Files Files Community

tourism-rf-model / src /deploy.py

Shramik121

Upload model and application files to Hugging Face Space

a0d2bf2 verified 5 months ago

raw

history blame contribute delete

5.66 kB


	import os
	import pandas as pd
	import joblib
	from huggingface_hub import HfApi, login, upload_file
	from datasets import load_dataset
	import logging

	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

	def create_dockerfile():
	with open("Dockerfile", "w") as f:
	f.write('''
	FROM python:3.12-slim
	WORKDIR /app
	COPY requirements.txt .
	RUN pip install --no-cache-dir -r requirements.txt
	COPY app.py .
	COPY model.joblib .
	COPY columns.joblib .
	COPY input_data.csv .
	EXPOSE 7860
	CMD ["waitress-serve", "--host=0.0.0.0", "--port=7860", "--threads=4", "--call", "app:app"]
	''')
	logging.info("Dockerfile created")

	def create_requirements():
	with open("requirements.txt", "w") as f:
	f.write('''
	numpy==1.26.4
	pandas==2.2.2
	scikit-learn==1.6.1
	joblib==1.4.2
	dill==0.3.8
	flask==3.0.3
	waitress==3.0.0
	''')
	logging.info("requirements.txt created")

	def create_app():
	with open("app.py", "w") as f:
	f.write('''
	from flask import Flask, request, jsonify
	import pandas as pd
	import joblib
	import logging
	import os
	import json

	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	app = Flask(__name__)
	base_dir = '/app' if os.path.exists('/app') else os.getcwd()
	model = joblib.load(os.path.join(base_dir, "model.joblib"))
	columns = joblib.load(os.path.join(base_dir, "columns.joblib"))

	required_columns = ['Age', 'DurationOfPitch', 'NumberOfPersonVisiting', 'NumberOfFollowups',
	'PreferredPropertyStar', 'NumberOfTrips', 'PitchSatisfactionScore',
	'NumberOfChildrenVisiting', 'MonthlyIncome', 'TypeofContact',
	'Occupation', 'Gender', 'ProductPitched', 'MaritalStatus',
	'Designation', 'CityTier']

	@app.route('/health', methods=['GET'])
	def health():
	return jsonify({'status': 'healthy'})

	@app.route('/predict', methods=['POST'])
	def predict():
	try:
	data = request.get_json(force=True)
	input_data = [data] if isinstance(data, dict) else data
	input_df = pd.DataFrame(input_data)
	num_cols = ['Age', 'DurationOfPitch', 'NumberOfPersonVisiting', 'NumberOfFollowups',
	'PreferredPropertyStar', 'NumberOfTrips', 'PitchSatisfactionScore',
	'NumberOfChildrenVisiting', 'MonthlyIncome']
	cat_cols = ['TypeofContact', 'Occupation', 'Gender', 'ProductPitched',
	'MaritalStatus', 'Designation', 'CityTier']
	for col in required_columns:
	if col not in input_df.columns:
	input_df[col] = 0.0 if col in num_cols else 'Unknown'
	input_df[num_cols] = input_df[num_cols].astype(float).fillna(input_df[num_cols].median())
	input_df[cat_cols] = input_df[cat_cols].fillna('Unknown')
	input_encoded = pd.get_dummies(input_df, columns=cat_cols, drop_first=True)
	for col in columns:
	if col not in input_encoded.columns:
	input_encoded[col] = 0
	input_encoded = input_encoded.reindex(columns=columns, fill_value=0)
	prediction = model.predict(input_encoded)
	return jsonify({'prediction': prediction.tolist()})
	except Exception as e:
	logger.error(f"Prediction failed: {str(e)}")
	return jsonify({'error': str(e)}), 400

	if __name__ == "__main__":
	from waitress import serve
	serve(app, host='0.0.0.0', port=7860, threads=4)
	''')
	logging.info("app.py created")

	def prepare_sample_data():
	dataset = load_dataset("Shramik121/tourism-split-dataset")
	sample_df = pd.DataFrame(dataset['test']).sample(2) # Reduced sample size
	sample_df.drop(columns=['ProdTaken'], inplace=True, errors='ignore')
	required_columns = ['Age', 'DurationOfPitch', 'NumberOfPersonVisiting', 'NumberOfFollowups',
	'PreferredPropertyStar', 'NumberOfTrips', 'PitchSatisfactionScore',
	'NumberOfChildrenVisiting', 'MonthlyIncome', 'TypeofContact',
	'Occupation', 'Gender', 'ProductPitched', 'MaritalStatus',
	'Designation', 'CityTier']
	num_cols = ['Age', 'DurationOfPitch', 'NumberOfPersonVisiting', 'NumberOfFollowups',
	'PreferredPropertyStar', 'NumberOfTrips', 'PitchSatisfactionScore',
	'NumberOfChildrenVisiting', 'MonthlyIncome']
	cat_cols = ['TypeofContact', 'Occupation', 'Gender', 'ProductPitched',
	'MaritalStatus', 'Designation', 'CityTier']
	for col in required_columns:
	if col not in sample_df.columns:
	sample_df[col] = 0.0 if col in num_cols else 'Unknown'
	sample_df = sample_df[required_columns]
	sample_df.to_csv("input_data.csv", index=False)
	logging.info("input_data.csv created")

	def deploy():
	login(token=os.getenv("HF_TOKEN"))
	space_name = os.getenv("SPACE_NAME", "Shramik121/tourism-rf-model")
	api = HfApi()
	api.create_repo(repo_id=space_name, repo_type="space", space_sdk="docker", private=False, exist_ok=True)
	files = ['app.py', 'model.joblib', 'columns.joblib', 'input_data.csv', 'requirements.txt', 'Dockerfile']
	for file in files:
	if os.path.exists(file):
	upload_file(path_or_fileobj=file, path_in_repo=file, repo_id=space_name, repo_type="space")
	logging.info(f"Uploaded {file} to {space_name}")
	else:
	logging.error(f"File {file} not found")
	raise FileNotFoundError(f"File {file} not found")

	if __name__ == "__main__":
	create_dockerfile()
	create_requirements()
	create_app()
	prepare_sample_data()
	deploy()