harikrishna1985 commited on
Commit
881d19a
·
verified ·
1 Parent(s): 893f42c

Upload src/predict.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. src/predict.py +46 -11
src/predict.py CHANGED
@@ -1,40 +1,75 @@
1
- from huggingface_hub import hf_hub_download
2
- from src.preprocess import preprocess_input
3
  import joblib
4
  import pandas as pd
5
  import yaml
6
 
 
 
 
7
 
8
  def load_config(config_path: str = "config/config.yaml") -> dict:
9
  with open(config_path, "r", encoding="utf-8") as f:
10
  return yaml.safe_load(f)
11
 
12
 
13
- def load_model():
14
  config = load_config()
 
15
  repo_id = config["model"]["repo_id"]
16
- filename = config["model"]["filename"]
 
17
 
18
  model_path = hf_hub_download(
19
  repo_id=repo_id,
20
- filename=filename
 
 
 
 
 
 
 
21
  )
 
22
  model = joblib.load(model_path)
23
- return model
24
 
 
 
 
 
 
 
 
 
 
 
25
 
 
 
26
 
27
- def predict_input(input_df):
28
- model = load_model()
 
 
 
 
 
 
29
 
30
  processed_df = preprocess_input(input_df)
31
 
32
- prediction = model.predict(processed_df)
 
 
 
33
 
34
- result = {"prediction": prediction[0]}
 
 
 
35
 
36
  if hasattr(model, "predict_proba"):
37
- proba = model.predict_proba(processed_df)
38
  result["probabilities"] = proba[0].tolist()
39
 
40
  return result
 
1
+ import json
 
2
  import joblib
3
  import pandas as pd
4
  import yaml
5
 
6
+ from huggingface_hub import hf_hub_download
7
+ from src.preprocess import preprocess_input
8
+
9
 
10
  def load_config(config_path: str = "config/config.yaml") -> dict:
11
  with open(config_path, "r", encoding="utf-8") as f:
12
  return yaml.safe_load(f)
13
 
14
 
15
+ def load_model_and_info():
16
  config = load_config()
17
+
18
  repo_id = config["model"]["repo_id"]
19
+ model_filename = config["model"]["filename"]
20
+ info_filename = config["model"]["info_filename"]
21
 
22
  model_path = hf_hub_download(
23
  repo_id=repo_id,
24
+ filename=model_filename,
25
+ repo_type="model",
26
+ )
27
+
28
+ info_path = hf_hub_download(
29
+ repo_id=repo_id,
30
+ filename=info_filename,
31
+ repo_type="model",
32
  )
33
+
34
  model = joblib.load(model_path)
 
35
 
36
+ with open(info_path, "r", encoding="utf-8") as f:
37
+ model_info = json.load(f)
38
+
39
+ return model, model_info
40
+
41
+
42
+ def align_features_for_inference(input_df: pd.DataFrame, feature_columns: list[str]) -> pd.DataFrame:
43
+ df = input_df.copy()
44
+
45
+ df.columns = [col.strip().lower().replace(" ", "_") for col in df.columns]
46
 
47
+ # apply one-hot encoding in case categoricals are introduced later
48
+ df = pd.get_dummies(df, drop_first=False)
49
 
50
+ # align to exact training feature set
51
+ df = df.reindex(columns=feature_columns, fill_value=0)
52
+
53
+ return df
54
+
55
+
56
+ def predict_input(input_df: pd.DataFrame) -> dict:
57
+ model, model_info = load_model_and_info()
58
 
59
  processed_df = preprocess_input(input_df)
60
 
61
+ feature_columns = model_info["feature_columns"]
62
+ aligned_df = align_features_for_inference(processed_df, feature_columns)
63
+
64
+ prediction = model.predict(aligned_df)
65
 
66
+ result = {
67
+ "prediction": prediction[0],
68
+ "processed_input": aligned_df.to_dict(orient="records")[0],
69
+ }
70
 
71
  if hasattr(model, "predict_proba"):
72
+ proba = model.predict_proba(aligned_df)
73
  result["probabilities"] = proba[0].tolist()
74
 
75
  return result