sumitsinha2603 commited on
Commit
9a76ee6
·
1 Parent(s): e758767

Add deployment files (Dockerfile, app, requirements)

Browse files
Files changed (3) hide show
  1. Dockerfile +22 -13
  2. app.py +164 -0
  3. requirements.txt +7 -3
Dockerfile CHANGED
@@ -1,20 +1,29 @@
1
- FROM python:3.13.5-slim
 
2
 
3
- WORKDIR /app
 
 
4
 
5
- RUN apt-get update && apt-get install -y \
6
- build-essential \
7
- curl \
8
- git \
9
- && rm -rf /var/lib/apt/lists/*
10
 
11
- COPY requirements.txt ./
12
- COPY src/ ./src/
 
 
13
 
14
- RUN pip3 install -r requirements.txt
 
 
 
15
 
16
- EXPOSE 8501
 
 
17
 
18
- HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
 
19
 
20
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.10-slim
3
 
4
+ # Set environment variables
5
+ ENV PYTHONDONTWRITEBYTECODE=1
6
+ ENV PYTHONUNBUFFERED=1
7
 
8
+ # Create working directory
9
+ WORKDIR /app
 
 
 
10
 
11
+ # Install system dependencies (git for huggingface repo usage)
12
+ RUN apt-get update && \
13
+ apt-get install -y --no-install-recommends git ca-certificates && \
14
+ rm -rf /var/lib/apt/lists/*
15
 
16
+ # Copy requirements and app
17
+ COPY requirements.txt /app/requirements.txt
18
+ RUN pip install --upgrade pip
19
+ RUN pip install --no-cache-dir -r /app/requirements.txt
20
 
21
+ # Copy application files
22
+ COPY app.py /app/app.py
23
+ COPY README.md /app/README.md
24
 
25
+ # Expose port (the app will run on this)
26
+ EXPOSE 7860
27
 
28
+ # By default, run the app
29
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import os
3
+ import io
4
+ import pandas as pd
5
+ from fastapi import FastAPI, UploadFile, File, Form, HTTPException
6
+ from pydantic import BaseModel
7
+ from typing import List, Optional, Any
8
+ from huggingface_hub import hf_hub_download, login
9
+ import joblib
10
+ import uvicorn
11
+ from contextlib import asynccontextmanager # Added this import
12
+
13
+ MODEL_REPO_ID = "sumitsinha2603/TourismPackagePredictionAnalysisModel"
14
+ MODEL_FILENAME = "TourismPackagePredictionAnalysisModel_v1.joblib"
15
+ HF_TOKEN = userdata.get('hf_token')
16
+ api = HfApi(token=HF_TOKEN)
17
+ DOWNLOAD_DIR = "/tmp/hf_model"
18
+
19
+ # -------- Initialize FastAPI --------
20
+ app = FastAPI(
21
+ title="Tourism Prediction Model Serving",
22
+ description="Load model from HF Hub, accept inputs, return predictions and save inputs to a DataFrame",
23
+ version="0.1"
24
+ )
25
+
26
+ model = None
27
+ label_encoders = {}
28
+
29
+ def ensure_logged_in():
30
+ if HF_TOKEN:
31
+ login(token=HF_TOKEN)
32
+ else:
33
+ pass
34
+
35
+ def load_model_from_hf():
36
+ """Download model file from HF Hub and load with joblib"""
37
+ global model
38
+ os.makedirs(DOWNLOAD_DIR, exist_ok=True)
39
+ ensure_logged_in()
40
+ try:
41
+ # Downloads file to local cache and returns full path
42
+ local_path = hf_hub_download(
43
+ repo_id=MODEL_REPO_ID,
44
+ filename=MODEL_FILENAME,
45
+ repo_type="model",
46
+ token=HF_TOKEN
47
+ )
48
+ except Exception as e:
49
+ raise RuntimeError(f"Failed to download model from HF Hub: {e}")
50
+
51
+ # Load with joblib
52
+ model_obj = joblib.load(local_path)
53
+ model = model_obj
54
+ return model
55
+
56
+ # Load model on startup
57
+ @asynccontextmanager
58
+ async def lifespan(app: FastAPI):
59
+ print("Loading model...")
60
+ global model
61
+ model = joblib.load("TourismPackagePredictionAnalysisModel_v1.joblib")
62
+ app.state.model = model
63
+ yield
64
+ print("Shutting down...")
65
+
66
+ # Re-initialize FastAPI to include lifespan, ensuring it's only defined once
67
+ app = FastAPI(
68
+ title="Tourism Prediction Model Serving",
69
+ description="Load model from HF Hub, accept inputs, return predictions and save inputs to a DataFrame",
70
+ version="0.1",
71
+ lifespan=lifespan # Pass the lifespan context manager here
72
+ )
73
+
74
+ class PredictRequest(BaseModel):
75
+ records: List[dict]
76
+
77
+ # -------- Helper to coerce inputs into DataFrame --------
78
+ def inputs_to_dataframe_from_file(file: UploadFile) -> pd.DataFrame:
79
+ # Accept CSV uploads
80
+ contents = file.file.read()
81
+ try:
82
+ df = pd.read_csv(io.BytesIO(contents))
83
+ except Exception as e:
84
+ raise HTTPException(status_code=400, detail=f"Failed to parse CSV: {e}")
85
+ return df
86
+
87
+ def inputs_to_dataframe_from_json(records: List[dict]) -> pd.DataFrame:
88
+ try:
89
+ df = pd.DataFrame(records)
90
+ except Exception as e:
91
+ raise HTTPException(status_code=400, detail=f"Invalid JSON records: {e}")
92
+ return df
93
+
94
+ # -------- Endpoint: predict --------
95
+ @app.post("/predict")
96
+ async def predict(payload: Optional[PredictRequest] = None, file: Optional[UploadFile] = File(None)):
97
+ """
98
+ Provide either:
99
+ - JSON body: {"records": [{...}, {...}]}
100
+ - or upload CSV file as form data
101
+ Returns predictions and the input dataframe saved as CSV inside container.
102
+ """
103
+ if payload is None and file is None:
104
+ raise HTTPException(status_code=400, detail="No input provided. Send JSON 'records' or upload a CSV file.")
105
+
106
+ # Convert input to dataframe
107
+ if file is not None:
108
+ df_in = inputs_to_dataframe_from_file(file)
109
+ else:
110
+ df_in = inputs_to_dataframe_from_json(payload.records)
111
+
112
+ current_model = app.state.model
113
+
114
+ if current_model is None:
115
+ # This block might be reached if lifespan failed or for debugging, but ideally model is always loaded
116
+ try:
117
+ load_model_from_hf()
118
+ except Exception as e:
119
+ raise HTTPException(status_code=500, detail=f"Model not loaded: {e}")
120
+ current_model = model # Update if load_model_from_hf was called
121
+
122
+ try:
123
+ preds = current_model.predict(df_in)
124
+ except Exception as e:
125
+ raise HTTPException(status_code=500, detail=f"Prediction failed: {e}")
126
+
127
+ # Save inputs
128
+ save_path = os.path.join("/app", "inputs.csv")
129
+ try:
130
+ # Append if file exists
131
+ if os.path.exists(save_path):
132
+ existing = pd.read_csv(save_path)
133
+ newdf = pd.concat([existing, df_in], ignore_index=True)
134
+ newdf.to_csv(save_path, index=False)
135
+ else:
136
+ df_in.to_csv(save_path, index=False)
137
+ except Exception as e:
138
+ # Non-fatal; continue
139
+ print("Warning: failed to save inputs:", e)
140
+
141
+ return {
142
+ "predictions": preds.tolist(),
143
+ "n_records": len(df_in),
144
+ "saved_to": save_path
145
+ }
146
+
147
+ # -------- Endpoint: save raw inputs only (optional) --------
148
+ @app.post("/save_inputs")
149
+ async def save_inputs(payload: PredictRequest):
150
+ df_in = inputs_to_dataframe_from_json(payload.records)
151
+ save_path = os.path.join("/app", "inputs.csv")
152
+ if os.path.exists(save_path):
153
+ existing = pd.read_csv(save_path)
154
+ newdf = pd.concat([existing, df_in], ignore_index=True)
155
+ newdf.to_csv(save_path, index=False)
156
+ else:
157
+ df_in.to_csv(save_path, index=False)
158
+ return {"saved_to": save_path, "n_records": len(df_in)}
159
+
160
+ # -------- Health check --------
161
+ @app.get("/health")
162
+ def health():
163
+ # Access model state via app.state
164
+ return {"status": "ok", "model_loaded": app.state.model is not None}
requirements.txt CHANGED
@@ -1,3 +1,7 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
1
+ pandas==2.2.2
2
+ huggingface_hub==0.32.6
3
+ streamlit==1.43.2
4
+ joblib==1.5.1
5
+ scikit-learn==1.6.0
6
+ xgboost==2.1.4
7
+ mlflow==3.0.1