Hrridoyv2 commited on
Commit
9bb9b0e
·
verified ·
1 Parent(s): 03ca089

Upload 5 files

Browse files
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #gradio app
2
+
3
+ import gradio as gr
4
+ import pandas as pd
5
+ import pickle
6
+ import numpy as np
7
+
8
+ # 1. Load the Model
9
+ with open("student_rf_pipeline.pkl", "rb") as f:
10
+ model = pickle.load(f)
11
+
12
+ # 2. The Logic Function
13
+ def predict_gpa(gender, age, address, famsize,
14
+ Pstatus, M_Edu, F_Edu, M_Job, F_Job,
15
+ relationship, smoker, tuition_fee, time_friends,
16
+ ssc_result):
17
+
18
+ # Pack inputs into a DataFrame
19
+ # The column names must match your CSV file exactly
20
+ input_df = pd.DataFrame([[
21
+ gender, age, address, famsize, Pstatus,
22
+ M_Edu, F_Edu, M_Job, F_Job, relationship,
23
+ smoker, tuition_fee, time_friends, ssc_result
24
+
25
+ ]],
26
+ columns=[
27
+ 'gender', 'age', 'address', 'famsize', 'Pstatus', 'M_Edu', 'F_Edu', 'M_Job', 'F_Job', 'relationship', 'smoker', 'tuition_fee', 'time_friends', 'ssc_result'
28
+ ])
29
+
30
+ # Predict
31
+ prediction = model.predict(input_df)[0]
32
+
33
+ # Return formatted result (Clipped 0-5)
34
+ return f"Predicted HSC Result: {np.clip(prediction, 0, 5):.2f}"
35
+
36
+ # 3. The App Interface
37
+ # Defining inputs in a list to keep it clean
38
+ inputs = [
39
+ gr.Radio(["M", "F"], label="Gender"),
40
+ gr.Number(label="Age", value=18),
41
+ gr.Radio(["Urban", "Rural"], label="Address"),
42
+ gr.Radio(["GT3", "LE3"], label="Family Size"),
43
+ gr.Radio(["Together", "Apart"], label="Parent Status"),
44
+ gr.Slider(0, 4, step=1, label="Mother's Edu"),
45
+ gr.Slider(0, 4, step=1, label="Father's Edu"),
46
+ gr.Dropdown(["At_home", "Health", "Other", "Services", "Teacher"], label="Mother's Job"),
47
+ gr.Dropdown(["Teacher", "Other", "Services", "Health", "Business", "Farmer"], label="Father's Job"),
48
+ gr.Radio(["Yes", "No"], label="Relationship"),
49
+ gr.Radio(["Yes", "No"], label="Smoker"),
50
+ gr.Number(label="Tuition Fee"),
51
+ gr.Slider(1, 5, step=1, label="Time with Friends"),
52
+ gr.Number(label="SSC Result (GPA)")
53
+ ]
54
+
55
+ app = gr.Interface(
56
+ fn=predict_gpa,
57
+ inputs=inputs,
58
+ outputs="text",
59
+ title="HSC Predictor")
60
+
61
+ app.launch(share=True)
62
+
63
+
bangladesh_student_performance_2018.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==24.1.0
2
+ annotated-doc==0.0.4
3
+ annotated-types==0.7.0
4
+ anyio==4.12.1
5
+ audioop-lts==0.2.2
6
+ brotli==1.2.0
7
+ certifi==2026.1.4
8
+ click==8.3.1
9
+ colorama==0.4.6
10
+ fastapi==0.128.0
11
+ ffmpy==1.0.0
12
+ filelock==3.20.3
13
+ fsspec==2026.1.0
14
+ gradio==6.3.0
15
+ gradio_client==2.0.3
16
+ groovy==0.1.2
17
+ h11==0.16.0
18
+ hf-xet==1.2.0
19
+ httpcore==1.0.9
20
+ httpx==0.28.1
21
+ huggingface_hub==1.3.1
22
+ idna==3.11
23
+ Jinja2==3.1.6
24
+ joblib==1.5.3
25
+ markdown-it-py==4.0.0
26
+ MarkupSafe==3.0.3
27
+ mdurl==0.1.2
28
+ numpy==2.4.1
29
+ orjson==3.11.5
30
+ packaging==25.0
31
+ pandas==2.3.3
32
+ pillow==12.1.0
33
+ pydantic==2.12.5
34
+ pydantic_core==2.41.5
35
+ pydub==0.25.1
36
+ Pygments==2.19.2
37
+ python-dateutil==2.9.0.post0
38
+ python-multipart==0.0.21
39
+ pytz==2025.2
40
+ PyYAML==6.0.3
41
+ rich==14.2.0
42
+ safehttpx==0.1.7
43
+ scikit-learn==1.8.0
44
+ scipy==1.17.0
45
+ semantic-version==2.10.0
46
+ shellingham==1.5.4
47
+ six==1.17.0
48
+ starlette==0.50.0
49
+ threadpoolctl==3.6.0
50
+ tomlkit==0.13.3
51
+ tqdm==4.67.1
52
+ typer==0.21.1
53
+ typer-slim==0.21.1
54
+ typing-inspection==0.4.2
55
+ typing_extensions==4.15.0
56
+ tzdata==2025.3
57
+ uvicorn==0.40.0
rf_train.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import pandas as pd
3
+ import numpy as np
4
+ import pickle
5
+
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder
8
+ from sklearn.impute import SimpleImputer
9
+ from sklearn.compose import ColumnTransformer
10
+ from sklearn.pipeline import Pipeline
11
+ from sklearn.ensemble import RandomForestRegressor
12
+ from sklearn.metrics import mean_squared_error, r2_score
13
+
14
+ # =====================
15
+ # Load dataset
16
+ # =====================
17
+ df = pd.read_csv("bangladesh_student_performance_2018.csv")
18
+
19
+ print(df)
20
+
21
+ # Drop date column
22
+ if 'date' in df.columns:
23
+ df.drop(columns=['date'], inplace=True)
24
+
25
+ # Target and features
26
+ X = df.drop('hsc_result', axis=1)
27
+ y = df['hsc_result']
28
+
29
+ # =====================
30
+ # Column split
31
+ # =====================
32
+ numeric_features = X.select_dtypes(include=['int64', 'float64']).columns
33
+ categorical_features = X.select_dtypes(include=['object']).columns
34
+
35
+ # =====================
36
+ # Preprocessing
37
+ # =====================
38
+ num_transformer = Pipeline(steps=[
39
+ ('imputer', SimpleImputer(strategy='median')),
40
+ ('scaler', StandardScaler())
41
+ ])
42
+
43
+ cat_transformer = Pipeline(steps=[
44
+ ('imputer', SimpleImputer(strategy='most_frequent')),
45
+ ('encoder', OneHotEncoder(handle_unknown='ignore'))
46
+ ])
47
+
48
+ preprocessor = ColumnTransformer(transformers=[
49
+ ('num', num_transformer, numeric_features),
50
+ ('cat', cat_transformer, categorical_features)
51
+ ])
52
+
53
+ # =====================
54
+ # Random Forest Model
55
+ # =====================
56
+ rf_model = RandomForestRegressor(
57
+ n_estimators=200,
58
+ max_depth=10,
59
+ min_samples_split=2,
60
+ random_state=42,
61
+ n_jobs=-1
62
+ )
63
+
64
+ # =====================
65
+ # Full Pipeline
66
+ # =====================
67
+ rf_pipeline = Pipeline(steps=[
68
+ ('preprocessor', preprocessor),
69
+ ('model', rf_model)
70
+ ])
71
+
72
+ # =====================
73
+ # Train-test split
74
+ # ====================
75
+ X_train, X_test, y_train, y_test = train_test_split(
76
+ X, y, test_size=0.2, random_state=42
77
+ )
78
+
79
+
80
+ rf_pipeline.fit(X_train, y_train)
81
+
82
+ # =====================
83
+ # Evaluation
84
+ # =====================
85
+ y_pred = rf_pipeline.predict(X_test)
86
+ rmse = np.sqrt(mean_squared_error(y_test, y_pred))
87
+ r2 = r2_score(y_test, y_pred)
88
+
89
+ print(f"RMSE: {rmse:.4f}")
90
+ print(f"R2 Score: {r2:.4f}")
91
+
92
+ # =====================
93
+ # Save model (IMPORTANT)
94
+ # =====================
95
+
96
+ with open("student_rf_pipeline.pkl", "wb") as f:
97
+ pickle.dump(rf_pipeline, f)
98
+
99
+ print("✅ Random Forest pipeline saved as student_rf_pipeline.pkl")
student_rf_pipeline.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc8c9af2c107dd2061c705653d0f9b8d73aaf87d31f432249968114c097e7136
3
+ size 14727571