Spaces:
Build error
Build error
| import pandas as pd | |
| import numpy as np | |
| import gradio as gr | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import LabelEncoder | |
| from sklearn.ensemble import RandomForestClassifier | |
| import joblib | |
| import re | |
| # Load and preprocess data | |
| def load_and_preprocess_data(filename): | |
| df = pd.read_csv(filename) | |
| label_encoders = {} | |
| for col in ["College Name", "Category", "Gender", "Branch", "Region"]: | |
| le = LabelEncoder() | |
| df[col] = le.fit_transform(df[col]) | |
| label_encoders[col] = le | |
| X = df[["Category", "Gender", "Opening Rank", "Closing Rank", "Region"]] | |
| y_college_branch = df[["College Name", "Branch"]] | |
| return X, y_college_branch, label_encoders, df | |
| filename = "AP_EAMCET_Engineering_10000 (1).csv" | |
| X, y_college_branch, label_encoders, df = load_and_preprocess_data(filename) | |
| # Train model | |
| def train_model(X, y): | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| model = RandomForestClassifier(n_estimators=100, random_state=42) | |
| model.fit(X_train, y_train) | |
| return model | |
| college_branch_model = train_model(X, y_college_branch) | |
| joblib.dump(college_branch_model, "college_branch_model.pkl") | |
| joblib.dump(label_encoders, "label_encoders.pkl") | |
| # Prediction function | |
| def predict_colleges(category, gender, rank, region): | |
| # Validate rank input | |
| if not isinstance(rank, (int, float)) or rank < 0 or "-" in str(rank) or not re.match(r'^\d+$', str(int(rank))): | |
| return "Invalid Rank: Please enter a valid positive integer without symbols or negative values." | |
| # Load label encoders | |
| label_encoders = joblib.load("label_encoders.pkl") | |
| # Transform input values using label encoders | |
| try: | |
| category_enc = label_encoders["Category"].transform([category])[0] | |
| gender_enc = label_encoders["Gender"].transform([gender])[0] | |
| region_enc = label_encoders["Region"].transform([region])[0] | |
| except ValueError: | |
| return "Invalid input values. Please select valid options." | |
| # Filter the dataset based on encoded values | |
| filtered_df = df[ | |
| (df["Category"] == category_enc) & | |
| (df["Gender"] == gender_enc) & | |
| (df["Opening Rank"] <= rank) & | |
| (df["Closing Rank"] >= rank) & | |
| (df["Region"] == region_enc) | |
| ] | |
| if filtered_df.empty: | |
| return "No matching colleges found." | |
| # Decode college names and branches | |
| filtered_df["College Name"] = label_encoders["College Name"].inverse_transform(filtered_df["College Name"].values) | |
| filtered_df["Branch"] = label_encoders["Branch"].inverse_transform(filtered_df["Branch"].values) | |
| result = filtered_df[["College Name", "Branch"]].drop_duplicates() | |
| return result | |
| # Gradio Interface | |
| demo = gr.Interface( | |
| fn=predict_colleges, | |
| inputs=[ | |
| gr.Dropdown(choices=["OC", "BC", "SC", "ST"], label="Category"), | |
| gr.Radio(choices=["Male", "Female"], label="Gender"), | |
| gr.Number(precision=0, label="Rank"), # Restrict to whole numbers only | |
| gr.Dropdown(choices=["AU", "SV"], label="Region") | |
| ], | |
| outputs=gr.Dataframe(headers=["College Name", "Branch"]), | |
| title="AP EAPCET College Predictor", | |
| description="Enter your details to predict all possible colleges and branches based on your rank.", | |
| theme="soft", | |
| css=".gradio-container .footer {display: none !important;}" | |
| ) | |
| demo.launch() | |