saad1BM commited on
Commit
cc12750
·
verified ·
1 Parent(s): c0e4ab4

Upload 10 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ flights_database.db filter=lfs diff=lfs merge=lfs -text
Dockerfile.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Requirements install karein
6
+ COPY requirements.txt .
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ # Saara code copy karein
10
+ COPY . .
11
+
12
+ # PERMISSION FIX: Hugging Face user ko permissions dena
13
+ RUN chmod -R 777 /app
14
+
15
+ # Port 7860 Hugging Face ke liye standard hai
16
+ EXPOSE 7860
17
+
18
+ CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,13 +1,18 @@
1
- ---
2
- title: Flightt
3
- emoji: 📊
4
- colorFrom: green
5
- colorTo: yellow
6
- sdk: gradio
7
- sdk_version: 6.8.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
1
+   pip install -r requirements.txt
2
+
3
+
4
+
5
+   python src/etl.py
6
+
7
+
8
+
9
+   python src/train.py
10
+
11
+   mlflow ui
12
+
13
+
14
+
15
+   uvicorn api.main:app --reload
16
+
17
+  
18
+
docker-compose.yml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+ services:
3
+ api:
4
+ build: .
5
+ ports:
6
+ - "7860:7860"
7
+ environment:
8
+ - DATABASE_URL=sqlite:///data/flights_database.db
9
+
etl.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sqlalchemy import create_engine
3
+ import os
4
+
5
+ def run_etl():
6
+
7
+ file_path = "data/T_ONTIME_REPORTING_20260228_131510/flights.csv"
8
+
9
+
10
+ db_engine = create_engine('sqlite:///data/flights_database.db')
11
+
12
+ print("ETL Process now start...")
13
+
14
+
15
+ try:
16
+ df = pd.read_csv(file_path, low_memory=False)
17
+ print(f"Data Loaded successfuly,: {df.shape[0]} rows find it.")
18
+ except FileNotFoundError:
19
+ print(f"Error: file not found: {file_path}")
20
+ return
21
+
22
+
23
+ df = df.dropna(subset=['ARR_DELAY', 'DEP_DELAY'])
24
+
25
+ if 'CANCELLED' in df.columns:
26
+ df = df[df['CANCELLED'] == 0]
27
+
28
+ df['is_delayed'] = (df['ARR_DELAY'] > 15).astype(int)
29
+
30
+ print("Cleaning aur Labeling completed,")
31
+
32
+ if not os.path.exists('data'):
33
+ os.makedirs('data')
34
+
35
+ sample_size = min(10000, len(df))
36
+ sample_df = df.sample(n=sample_size)
37
+
38
+ sample_df.to_sql('cleaned_flights', con=db_engine, if_exists='replace', index=False)
39
+
40
+ print(f"Data saved to SQL Database,")
41
+ print(f"Database Location: data/flights_database.db")
42
+
43
+ if __name__ == "__main__":
44
+ run_etl()
flight_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a3c53729b6d3cb927202437d5549ff1af45ed3c7e7ccc4c6e5584f975dcd29d
3
+ size 233999
flights_database.db ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e39c3f94639d22c266a4234fadc72386ba2d492578eb80ad43876c42886b55b1
3
+ size 897024
label_encoder.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:101a2af51a81d8cc66d3c37963007da791aeedb358ca930268f7fd862926c18d
3
+ size 2179
main.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import joblib
3
+ import pandas as pd
4
+ import logging
5
+ from fastapi import FastAPI
6
+ from pydantic import BaseModel
7
+ from datetime import datetime
8
+
9
+ # LOGGING: Console par log karein taake Docker ya cloud environments mein logs asani se milain
10
+ logging.basicConfig(
11
+ level=logging.INFO,
12
+ format='%(asctime)s - %(levelname)s - %(message)s'
13
+ )
14
+
15
+ app = FastAPI(title="Flight Delay Prediction API")
16
+
17
+ # --- PATH SETUP (DOCKER FRIENDLY) ---
18
+ # BASE_DIR ko aik level up le kar jana hai (api folder se bahar)
19
+ # Example: /app/api/main.py -> /app/
20
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
21
+ MODEL_PATH = os.path.join(BASE_DIR, 'models', 'flight_model.joblib')
22
+
23
+ logging.info(f"Looking for model at: {MODEL_PATH}")
24
+
25
+ try:
26
+ # Model load logic
27
+ model = joblib.load(MODEL_PATH)
28
+ logging.info("Model successfully loaded.")
29
+ except Exception as e:
30
+ logging.error(f"Model load karne mein masla: {e}")
31
+ model = None
32
+
33
+ # --- DATA SCHEMA ---
34
+ class FlightData(BaseModel):
35
+ MONTH: int
36
+ DAY_OF_WEEK: int
37
+ DISTANCE: float
38
+ CRS_DEP_TIME: int
39
+ OP_UNIQUE_CARRIER: str
40
+ ORIGIN: str
41
+ DEST: str
42
+
43
+ # --- ENDPOINTS ---
44
+ @app.get("/")
45
+ def home():
46
+ return {"message": "Flight Delay Prediction API is Running!"}
47
+
48
+ @app.post("/predict")
49
+ def predict(data: FlightData):
50
+ if model is None:
51
+ logging.error("Prediction failed: Model is not loaded.")
52
+ return {"error": "Model not loaded on server. Check path and logs."}
53
+
54
+ logging.info(f"Prediction requested for: {data.dict()}")
55
+
56
+ try:
57
+ # Convert incoming Pydantic model to DataFrame
58
+ input_df = pd.DataFrame([data.dict()])
59
+
60
+ # Categorical columns ko numeric mein badalne ke liye hash use ho raha hai
61
+ for col in ['OP_UNIQUE_CARRIER', 'ORIGIN', 'DEST']:
62
+ input_df[col] = input_df[col].apply(lambda x: abs(hash(str(x))) % 1000)
63
+
64
+ # Make Prediction
65
+ prediction = model.predict(input_df)[0]
66
+ probability = model.predict_proba(input_df)[0][1]
67
+
68
+ result = {
69
+ "delay_probability": round(float(probability), 2),
70
+ "prediction": "Delayed" if prediction == 1 else "On Time"
71
+ }
72
+
73
+ logging.info(f"Prediction successful: {result}")
74
+ return result
75
+
76
+ except Exception as e:
77
+ logging.error(f"Prediction error: {str(e)}")
78
+ return {"error": "There is an issue with the prediction process", "details": str(e)}
79
+
80
+ if __name__ == "__main__":
81
+ import uvicorn
82
+ # Docker/External access ke liye 0.0.0.0 zaroori hai
83
+ uvicorn.run(app, host="0.0.0.0", port=8000)
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ pandas
2
+ numpy
3
+ scikit-learn
4
+ xgboost
5
+ fastapi
6
+ uvicorn
7
+ mlflow
8
+ joblib
9
+ sqlalchemy
train.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import sqlite3
3
+ from sqlalchemy import create_engine
4
+ from xgboost import XGBClassifier
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.metrics import accuracy_score, roc_auc_score
7
+ from sklearn.preprocessing import LabelEncoder
8
+ import mlflow
9
+ import mlflow.sklearn
10
+ import joblib
11
+ import os
12
+
13
+ mlflow.set_tracking_uri("sqlite:///mlflow.db")
14
+
15
+ def train_model():
16
+ print("Model Training started")
17
+
18
+ db_path = 'sqlite:///data/flights_database.db'
19
+ engine = create_engine(db_path)
20
+
21
+ try:
22
+ df = pd.read_sql('SELECT * FROM cleaned_flights', engine)
23
+ except Exception as e:
24
+ print(f"Error: Database data not found. Check it,: {e}")
25
+ return
26
+
27
+ features = ['MONTH', 'DAY_OF_WEEK', 'DISTANCE', 'CRS_DEP_TIME', 'OP_UNIQUE_CARRIER', 'ORIGIN', 'DEST']
28
+ X = df[features].copy()
29
+ y = df['is_delayed']
30
+
31
+ encoders = {}
32
+ for col in ['OP_UNIQUE_CARRIER', 'ORIGIN', 'DEST']:
33
+ le = LabelEncoder()
34
+ X[col] = le.fit_transform(X[col])
35
+ encoders[col] = le
36
+
37
+
38
+ os.makedirs('models', exist_ok=True)
39
+ joblib.dump(encoders, 'models/label_encoders.joblib')
40
+ print("All Label Encoders saved to models/label_encoders.joblib")
41
+
42
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
43
+
44
+ mlflow.set_experiment("Flight_Delay_Prediction")
45
+
46
+ with mlflow.start_run():
47
+ print("XGBoost Model train")
48
+
49
+ params = {
50
+ "n_estimators": 100,
51
+ "max_depth": 5,
52
+ "learning_rate": 0.1,
53
+ "use_label_encoder": False,
54
+ "eval_metric": "logloss"
55
+ }
56
+
57
+ model = XGBClassifier(**params)
58
+ model.fit(X_train, y_train)
59
+
60
+
61
+ y_pred = model.predict(X_test)
62
+ y_proba = model.predict_proba(X_test)[:, 1]
63
+
64
+
65
+ acc = accuracy_score(y_test, y_pred)
66
+ auc = roc_auc_score(y_test, y_proba)
67
+
68
+ print(f"Accuracy: {acc:.2f}")
69
+ print(f"ROC-AUC: {auc:.2f}")
70
+
71
+
72
+ mlflow.log_params(params)
73
+ mlflow.log_metric("accuracy", acc)
74
+ mlflow.log_metric("roc_auc", auc)
75
+
76
+
77
+ mlflow.sklearn.log_model(model, "model")
78
+
79
+
80
+ joblib.dump(model, 'models/flight_model.joblib')
81
+ print("Model saved: models/flight_model.joblib")
82
+
83
+ if __name__ == "__main__":
84
+ train_model()