import os
import pandas as pd
import joblib
from huggingface_hub import HfApi, login, upload_file
from datasets import load_dataset
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def create_dockerfile():
    with open("Dockerfile", "w") as f:
        f.write('''
FROM python:3.12-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY app.py .
COPY model.joblib .
COPY columns.joblib .
COPY input_data.csv .
EXPOSE 7860
CMD ["waitress-serve", "--host=0.0.0.0", "--port=7860", "--threads=4", "--call", "app:app"]
''')
    logging.info("Dockerfile created")

def create_requirements():
    with open("requirements.txt", "w") as f:
        f.write('''
numpy==1.26.4
pandas==2.2.2
scikit-learn==1.6.1
joblib==1.4.2
dill==0.3.8
flask==3.0.3
waitress==3.0.0
''')
    logging.info("requirements.txt created")

def create_app():
    with open("app.py", "w") as f:
        f.write('''
from flask import Flask, request, jsonify
import pandas as pd
import joblib
import logging
import os
import json

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

app = Flask(__name__)
base_dir = '/app' if os.path.exists('/app') else os.getcwd()
model = joblib.load(os.path.join(base_dir, "model.joblib"))
columns = joblib.load(os.path.join(base_dir, "columns.joblib"))

required_columns = ['Age', 'DurationOfPitch', 'NumberOfPersonVisiting', 'NumberOfFollowups', 
                   'PreferredPropertyStar', 'NumberOfTrips', 'PitchSatisfactionScore', 
                   'NumberOfChildrenVisiting', 'MonthlyIncome', 'TypeofContact', 
                   'Occupation', 'Gender', 'ProductPitched', 'MaritalStatus', 
                   'Designation', 'CityTier']

@app.route('/health', methods=['GET'])
def health():
    return jsonify({'status': 'healthy'})

@app.route('/predict', methods=['POST'])
def predict():
    try:
        data = request.get_json(force=True)
        input_data = [data] if isinstance(data, dict) else data
        input_df = pd.DataFrame(input_data)
        num_cols = ['Age', 'DurationOfPitch', 'NumberOfPersonVisiting', 'NumberOfFollowups', 
                    'PreferredPropertyStar', 'NumberOfTrips', 'PitchSatisfactionScore', 
                    'NumberOfChildrenVisiting', 'MonthlyIncome']
        cat_cols = ['TypeofContact', 'Occupation', 'Gender', 'ProductPitched', 
                    'MaritalStatus', 'Designation', 'CityTier']
        for col in required_columns:
            if col not in input_df.columns:
                input_df[col] = 0.0 if col in num_cols else 'Unknown'
        input_df[num_cols] = input_df[num_cols].astype(float).fillna(input_df[num_cols].median())
        input_df[cat_cols] = input_df[cat_cols].fillna('Unknown')
        input_encoded = pd.get_dummies(input_df, columns=cat_cols, drop_first=True)
        for col in columns:
            if col not in input_encoded.columns:
                input_encoded[col] = 0
        input_encoded = input_encoded.reindex(columns=columns, fill_value=0)
        prediction = model.predict(input_encoded)
        return jsonify({'prediction': prediction.tolist()})
    except Exception as e:
        logger.error(f"Prediction failed: {str(e)}")
        return jsonify({'error': str(e)}), 400

if __name__ == "__main__":
    from waitress import serve
    serve(app, host='0.0.0.0', port=7860, threads=4)
''')
    logging.info("app.py created")

def prepare_sample_data():
    dataset = load_dataset("Shramik121/tourism-split-dataset")
    sample_df = pd.DataFrame(dataset['test']).sample(2)  # Reduced sample size
    sample_df.drop(columns=['ProdTaken'], inplace=True, errors='ignore')
    required_columns = ['Age', 'DurationOfPitch', 'NumberOfPersonVisiting', 'NumberOfFollowups', 
                       'PreferredPropertyStar', 'NumberOfTrips', 'PitchSatisfactionScore', 
                       'NumberOfChildrenVisiting', 'MonthlyIncome', 'TypeofContact', 
                       'Occupation', 'Gender', 'ProductPitched', 'MaritalStatus', 
                       'Designation', 'CityTier']
    num_cols = ['Age', 'DurationOfPitch', 'NumberOfPersonVisiting', 'NumberOfFollowups', 
                'PreferredPropertyStar', 'NumberOfTrips', 'PitchSatisfactionScore', 
                'NumberOfChildrenVisiting', 'MonthlyIncome']
    cat_cols = ['TypeofContact', 'Occupation', 'Gender', 'ProductPitched', 
                'MaritalStatus', 'Designation', 'CityTier']
    for col in required_columns:
        if col not in sample_df.columns:
            sample_df[col] = 0.0 if col in num_cols else 'Unknown'
    sample_df = sample_df[required_columns]
    sample_df.to_csv("input_data.csv", index=False)
    logging.info("input_data.csv created")

def deploy():
    login(token=os.getenv("HF_TOKEN"))
    space_name = os.getenv("SPACE_NAME", "Shramik121/tourism-rf-model")
    api = HfApi()
    api.create_repo(repo_id=space_name, repo_type="space", space_sdk="docker", private=False, exist_ok=True)
    files = ['app.py', 'model.joblib', 'columns.joblib', 'input_data.csv', 'requirements.txt', 'Dockerfile']
    for file in files:
        if os.path.exists(file):
            upload_file(path_or_fileobj=file, path_in_repo=file, repo_id=space_name, repo_type="space")
            logging.info(f"Uploaded {file} to {space_name}")
        else:
            logging.error(f"File {file} not found")
            raise FileNotFoundError(f"File {file} not found")

if __name__ == "__main__":
    create_dockerfile()
    create_requirements()
    create_app()
    prepare_sample_data()
    deploy()