Spaces:
Runtime error
Runtime error
| import os | |
| import pandas as pd | |
| import joblib | |
| from huggingface_hub import HfApi, login, upload_file | |
| from datasets import load_dataset | |
| import logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| def create_dockerfile(): | |
| with open("Dockerfile", "w") as f: | |
| f.write(''' | |
| FROM python:3.12-slim | |
| WORKDIR /app | |
| COPY requirements.txt . | |
| RUN pip install --no-cache-dir -r requirements.txt | |
| COPY app.py . | |
| COPY model.joblib . | |
| COPY columns.joblib . | |
| COPY input_data.csv . | |
| EXPOSE 7860 | |
| CMD ["waitress-serve", "--host=0.0.0.0", "--port=7860", "--threads=4", "--call", "app:app"] | |
| ''') | |
| logging.info("Dockerfile created") | |
| def create_requirements(): | |
| with open("requirements.txt", "w") as f: | |
| f.write(''' | |
| numpy==1.26.4 | |
| pandas==2.2.2 | |
| scikit-learn==1.6.1 | |
| joblib==1.4.2 | |
| dill==0.3.8 | |
| flask==3.0.3 | |
| waitress==3.0.0 | |
| ''') | |
| logging.info("requirements.txt created") | |
| def create_app(): | |
| with open("app.py", "w") as f: | |
| f.write(''' | |
| from flask import Flask, request, jsonify | |
| import pandas as pd | |
| import joblib | |
| import logging | |
| import os | |
| import json | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| logger = logging.getLogger(__name__) | |
| app = Flask(__name__) | |
| base_dir = '/app' if os.path.exists('/app') else os.getcwd() | |
| model = joblib.load(os.path.join(base_dir, "model.joblib")) | |
| columns = joblib.load(os.path.join(base_dir, "columns.joblib")) | |
| required_columns = ['Age', 'DurationOfPitch', 'NumberOfPersonVisiting', 'NumberOfFollowups', | |
| 'PreferredPropertyStar', 'NumberOfTrips', 'PitchSatisfactionScore', | |
| 'NumberOfChildrenVisiting', 'MonthlyIncome', 'TypeofContact', | |
| 'Occupation', 'Gender', 'ProductPitched', 'MaritalStatus', | |
| 'Designation', 'CityTier'] | |
| @app.route('/health', methods=['GET']) | |
| def health(): | |
| return jsonify({'status': 'healthy'}) | |
| @app.route('/predict', methods=['POST']) | |
| def predict(): | |
| try: | |
| data = request.get_json(force=True) | |
| input_data = [data] if isinstance(data, dict) else data | |
| input_df = pd.DataFrame(input_data) | |
| num_cols = ['Age', 'DurationOfPitch', 'NumberOfPersonVisiting', 'NumberOfFollowups', | |
| 'PreferredPropertyStar', 'NumberOfTrips', 'PitchSatisfactionScore', | |
| 'NumberOfChildrenVisiting', 'MonthlyIncome'] | |
| cat_cols = ['TypeofContact', 'Occupation', 'Gender', 'ProductPitched', | |
| 'MaritalStatus', 'Designation', 'CityTier'] | |
| for col in required_columns: | |
| if col not in input_df.columns: | |
| input_df[col] = 0.0 if col in num_cols else 'Unknown' | |
| input_df[num_cols] = input_df[num_cols].astype(float).fillna(input_df[num_cols].median()) | |
| input_df[cat_cols] = input_df[cat_cols].fillna('Unknown') | |
| input_encoded = pd.get_dummies(input_df, columns=cat_cols, drop_first=True) | |
| for col in columns: | |
| if col not in input_encoded.columns: | |
| input_encoded[col] = 0 | |
| input_encoded = input_encoded.reindex(columns=columns, fill_value=0) | |
| prediction = model.predict(input_encoded) | |
| return jsonify({'prediction': prediction.tolist()}) | |
| except Exception as e: | |
| logger.error(f"Prediction failed: {str(e)}") | |
| return jsonify({'error': str(e)}), 400 | |
| if __name__ == "__main__": | |
| from waitress import serve | |
| serve(app, host='0.0.0.0', port=7860, threads=4) | |
| ''') | |
| logging.info("app.py created") | |
| def prepare_sample_data(): | |
| dataset = load_dataset("Shramik121/tourism-split-dataset") | |
| sample_df = pd.DataFrame(dataset['test']).sample(2) # Reduced sample size | |
| sample_df.drop(columns=['ProdTaken'], inplace=True, errors='ignore') | |
| required_columns = ['Age', 'DurationOfPitch', 'NumberOfPersonVisiting', 'NumberOfFollowups', | |
| 'PreferredPropertyStar', 'NumberOfTrips', 'PitchSatisfactionScore', | |
| 'NumberOfChildrenVisiting', 'MonthlyIncome', 'TypeofContact', | |
| 'Occupation', 'Gender', 'ProductPitched', 'MaritalStatus', | |
| 'Designation', 'CityTier'] | |
| num_cols = ['Age', 'DurationOfPitch', 'NumberOfPersonVisiting', 'NumberOfFollowups', | |
| 'PreferredPropertyStar', 'NumberOfTrips', 'PitchSatisfactionScore', | |
| 'NumberOfChildrenVisiting', 'MonthlyIncome'] | |
| cat_cols = ['TypeofContact', 'Occupation', 'Gender', 'ProductPitched', | |
| 'MaritalStatus', 'Designation', 'CityTier'] | |
| for col in required_columns: | |
| if col not in sample_df.columns: | |
| sample_df[col] = 0.0 if col in num_cols else 'Unknown' | |
| sample_df = sample_df[required_columns] | |
| sample_df.to_csv("input_data.csv", index=False) | |
| logging.info("input_data.csv created") | |
| def deploy(): | |
| login(token=os.getenv("HF_TOKEN")) | |
| space_name = os.getenv("SPACE_NAME", "Shramik121/tourism-rf-model") | |
| api = HfApi() | |
| api.create_repo(repo_id=space_name, repo_type="space", space_sdk="docker", private=False, exist_ok=True) | |
| files = ['app.py', 'model.joblib', 'columns.joblib', 'input_data.csv', 'requirements.txt', 'Dockerfile'] | |
| for file in files: | |
| if os.path.exists(file): | |
| upload_file(path_or_fileobj=file, path_in_repo=file, repo_id=space_name, repo_type="space") | |
| logging.info(f"Uploaded {file} to {space_name}") | |
| else: | |
| logging.error(f"File {file} not found") | |
| raise FileNotFoundError(f"File {file} not found") | |
| if __name__ == "__main__": | |
| create_dockerfile() | |
| create_requirements() | |
| create_app() | |
| prepare_sample_data() | |
| deploy() | |