ashtii commited on
Commit
6681f0f
·
verified ·
1 Parent(s): 1935def

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -64
app.py CHANGED
@@ -1,87 +1,110 @@
 
1
  import gradio as gr
2
- import joblib
3
- import os
4
- import requests
5
 
6
- # -----------------------------
7
- # 1) Download model + vectorizers directly (NOT git clone)
8
- # -----------------------------
9
- base_url = "https://huggingface.co/ashtii/cosmetic-category-model/resolve/main/"
10
-
11
- files_to_download = [
12
- "model.joblib",
13
- "char_vect.joblib",
14
- "word_vect.joblib",
15
- "vect_f.joblib",
16
- "char_vect_cat.joblib"
17
- ]
18
 
19
  os.makedirs("modelrepo", exist_ok=True)
20
-
21
- for file in files_to_download:
22
- url = base_url + file
23
- save_path = f"modelrepo/{file}"
24
  try:
25
- r = requests.get(url)
26
  if r.status_code == 200:
27
- with open(save_path, "wb") as f:
28
- f.write(r.content)
29
- print(f"Downloaded {file}")
30
- else:
31
- print(f"Skipping {file} (not found)")
32
- except:
33
- print(f"Error downloading {file}")
34
 
35
- # -----------------------------
36
- # 2) Load model
37
- # -----------------------------
38
  model = joblib.load("modelrepo/model.joblib")
 
 
39
 
40
- # -----------------------------
41
- # 3) Optional vectorizers
42
- # -----------------------------
43
- def load_optional(path):
44
  try:
45
  return joblib.load(path)
46
- except:
47
  return None
48
 
49
- char_vect = load_optional("modelrepo/char_vect.joblib")
50
- word_vect = load_optional("modelrepo/word_vect.joblib")
51
- vect_f = load_optional("modelrepo/vect_f.joblib")
52
- char_vect_cat = load_optional("modelrepo/char_vect_cat.joblib")
 
 
 
 
 
53
 
54
- vectorizer = char_vect or word_vect or vect_f or char_vect_cat
 
 
 
 
 
 
 
 
55
 
56
- print("Active vectorizer:", type(vectorizer))
 
 
 
 
 
 
57
 
58
- # -----------------------------
59
- # 4) Prediction function
60
- # -----------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  def predict(text):
62
  try:
63
- X = [text]
 
 
64
 
65
- if vectorizer:
66
- X_vec = vectorizer.transform(X)
67
- pred = model.predict(X_vec).tolist()
68
  else:
69
- pred = model.predict(X).tolist()
70
-
71
- return {"prediction": pred}
72
-
73
  except Exception as e:
74
  return {"error": str(e)}
75
 
76
- # -----------------------------
77
- # 5) Gradio Interface
78
- # -----------------------------
79
- api = gr.Interface(
80
- fn=predict,
81
- inputs=gr.Textbox(label="Ingredients text"),
82
- outputs="json",
83
- title="Cosmetic Category Classifier",
84
- description="Enter ingredients and get a predicted product category."
85
- )
86
-
87
- api.launch()
 
1
+ # app.py — attempt to reconstruct training features by stacking available vectorizers
2
  import gradio as gr
3
+ import joblib, os, requests
4
+ import numpy as np
5
+ from scipy.sparse import hstack, csr_matrix
6
 
7
+ # download model files directly from HF repo
8
+ BASE = "https://huggingface.co/ashtii/cosmetic-category-model/resolve/main/"
9
+ FILES = ["model.joblib",
10
+ "char_vect.joblib","word_vect.joblib","vect_f.joblib",
11
+ "char_vect_cat.joblib","word_vect_cat.joblib"]
 
 
 
 
 
 
 
12
 
13
  os.makedirs("modelrepo", exist_ok=True)
14
+ for f in FILES:
15
+ url = BASE + f
 
 
16
  try:
17
+ r = requests.get(url, timeout=20)
18
  if r.status_code == 200:
19
+ with open(os.path.join("modelrepo", f), "wb") as fh:
20
+ fh.write(r.content)
21
+ print("Downloaded", f)
22
+ except Exception as e:
23
+ print("skip", f, e)
 
 
24
 
25
+ # load model
 
 
26
  model = joblib.load("modelrepo/model.joblib")
27
+ EXPECTED_DIM = getattr(model, "n_features_in_", None)
28
+ print("Model expects features:", EXPECTED_DIM)
29
 
30
+ # helper to load optional vectorizers
31
+ def opt_load(path):
 
 
32
  try:
33
  return joblib.load(path)
34
+ except Exception:
35
  return None
36
 
37
+ # load vectorizers that exist
38
+ vec_names = ["char_vect.joblib","word_vect.joblib","vect_f.joblib","char_vect_cat.joblib","word_vect_cat.joblib"]
39
+ vecs = []
40
+ for name in vec_names:
41
+ p = os.path.join("modelrepo", name)
42
+ v = opt_load(p)
43
+ if v is not None:
44
+ vecs.append((name, v))
45
+ print("Loaded vectorizer:", name, type(v))
46
 
47
+ # Function to build combined features
48
+ def build_features(text):
49
+ # Accept `text` as string or list
50
+ if isinstance(text, str):
51
+ X_in = [text]
52
+ elif isinstance(text, (list,tuple)):
53
+ X_in = list(text)
54
+ else:
55
+ X_in = [str(text)]
56
 
57
+ mats = []
58
+ for (name, v) in vecs:
59
+ try:
60
+ mat = v.transform(X_in)
61
+ mats.append(mat if hasattr(mat, "shape") else csr_matrix(mat))
62
+ except Exception as e:
63
+ print("transform failed for", name, e)
64
 
65
+ if not mats:
66
+ # No vectorizers loaded — fallback: try model.predict on raw text (may fail)
67
+ return None
68
+
69
+ # hstack the sparse matrices in the same order we loaded them
70
+ try:
71
+ X_comb = hstack(mats).tocsr()
72
+ except Exception as e:
73
+ # if any mat is dense, convert to sparse and hstack
74
+ mats2 = [csr_matrix(m) if not hasattr(m, "tocsr") else m.tocsr() for m in mats]
75
+ X_comb = hstack(mats2).tocsr()
76
+
77
+ # If model expects a fixed size, pad or trim to match
78
+ if EXPECTED_DIM is not None:
79
+ cur = X_comb.shape[1]
80
+ if cur < EXPECTED_DIM:
81
+ # pad with zeros on the right
82
+ pad_width = EXPECTED_DIM - cur
83
+ pad = csr_matrix((X_comb.shape[0], pad_width), dtype=X_comb.dtype)
84
+ X_comb = hstack([X_comb, pad]).tocsr()
85
+ elif cur > EXPECTED_DIM:
86
+ # trim extra columns (best-effort)
87
+ X_comb = X_comb[:, :EXPECTED_DIM]
88
+
89
+ return X_comb
90
+
91
+ # prediction function
92
  def predict(text):
93
  try:
94
+ X = build_features(text)
95
+ if X is None:
96
+ return {"error": "No vectorizers available; cannot build features."}
97
 
98
+ # If model accepts predict_proba return probabilities else labels
99
+ if hasattr(model, "predict_proba"):
100
+ out = model.predict_proba(X).tolist()
101
  else:
102
+ out = model.predict(X).tolist()
103
+ return {"prediction": out}
 
 
104
  except Exception as e:
105
  return {"error": str(e)}
106
 
107
+ # Gradio interface
108
+ iface = gr.Interface(fn=predict, inputs=gr.Textbox(lines=2, placeholder="Aqua, glycerin, ..."), outputs="json",
109
+ title="Cosmetic Category Classifier")
110
+ iface.launch()