Spaces:

ashtii
/

cosmetic-category-api2

Runtime error

App Files Files Community

ashtii commited on Dec 1, 2025

Commit

6681f0f

verified ·

1 Parent(s): 1935def

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -64

app.py CHANGED Viewed

@@ -1,87 +1,110 @@
 import gradio as gr
-import joblib
-import os
-import requests
-# -----------------------------
-# 1) Download model + vectorizers directly (NOT git clone)
-# -----------------------------
-base_url = "https://huggingface.co/ashtii/cosmetic-category-model/resolve/main/"
-files_to_download = [
-    "model.joblib",
-    "char_vect.joblib",
-    "word_vect.joblib",
-    "vect_f.joblib",
-    "char_vect_cat.joblib"
-]
 os.makedirs("modelrepo", exist_ok=True)
-for file in files_to_download:
-    url = base_url + file
-    save_path = f"modelrepo/{file}"
     try:
-        r = requests.get(url)
         if r.status_code == 200:
-            with open(save_path, "wb") as f:
-                f.write(r.content)
-            print(f"Downloaded {file}")
-        else:
-            print(f"Skipping {file} (not found)")
-    except:
-        print(f"Error downloading {file}")
-# -----------------------------
-# 2) Load model
-# -----------------------------
 model = joblib.load("modelrepo/model.joblib")
-# -----------------------------
-# 3) Optional vectorizers
-# -----------------------------
-def load_optional(path):
     try:
         return joblib.load(path)
-    except:
         return None
-char_vect = load_optional("modelrepo/char_vect.joblib")
-word_vect = load_optional("modelrepo/word_vect.joblib")
-vect_f   = load_optional("modelrepo/vect_f.joblib")
-char_vect_cat = load_optional("modelrepo/char_vect_cat.joblib")
-vectorizer = char_vect or word_vect or vect_f or char_vect_cat
-print("Active vectorizer:", type(vectorizer))
-# -----------------------------
-# 4) Prediction function
-# -----------------------------
 def predict(text):
     try:
-        X = [text]
-        if vectorizer:
-            X_vec = vectorizer.transform(X)
-            pred = model.predict(X_vec).tolist()
         else:
-            pred = model.predict(X).tolist()
-        return {"prediction": pred}
     except Exception as e:
         return {"error": str(e)}
-# -----------------------------
-# 5) Gradio Interface
-# -----------------------------
-api = gr.Interface(
-    fn=predict,
-    inputs=gr.Textbox(label="Ingredients text"),
-    outputs="json",
-    title="Cosmetic Category Classifier",
-    description="Enter ingredients and get a predicted product category."
-)
-api.launch()

+# app.py — attempt to reconstruct training features by stacking available vectorizers
 import gradio as gr
+import joblib, os, requests
+import numpy as np
+from scipy.sparse import hstack, csr_matrix
+# download model files directly from HF repo
+BASE = "https://huggingface.co/ashtii/cosmetic-category-model/resolve/main/"
+FILES = ["model.joblib",
+         "char_vect.joblib","word_vect.joblib","vect_f.joblib",
+         "char_vect_cat.joblib","word_vect_cat.joblib"]
 os.makedirs("modelrepo", exist_ok=True)
+for f in FILES:
+    url = BASE + f
     try:
+        r = requests.get(url, timeout=20)
         if r.status_code == 200:
+            with open(os.path.join("modelrepo", f), "wb") as fh:
+                fh.write(r.content)
+            print("Downloaded", f)
+    except Exception as e:
+        print("skip", f, e)
+# load model
 model = joblib.load("modelrepo/model.joblib")
+EXPECTED_DIM = getattr(model, "n_features_in_", None)
+print("Model expects features:", EXPECTED_DIM)
+# helper to load optional vectorizers
+def opt_load(path):
     try:
         return joblib.load(path)
+    except Exception:
         return None
+# load vectorizers that exist
+vec_names = ["char_vect.joblib","word_vect.joblib","vect_f.joblib","char_vect_cat.joblib","word_vect_cat.joblib"]
+vecs = []
+for name in vec_names:
+    p = os.path.join("modelrepo", name)
+    v = opt_load(p)
+    if v is not None:
+        vecs.append((name, v))
+        print("Loaded vectorizer:", name, type(v))
+# Function to build combined features
+def build_features(text):
+    # Accept `text` as string or list
+    if isinstance(text, str):
+        X_in = [text]
+    elif isinstance(text, (list,tuple)):
+        X_in = list(text)
+    else:
+        X_in = [str(text)]
+    mats = []
+    for (name, v) in vecs:
+        try:
+            mat = v.transform(X_in)
+            mats.append(mat if hasattr(mat, "shape") else csr_matrix(mat))
+        except Exception as e:
+            print("transform failed for", name, e)
+    if not mats:
+        # No vectorizers loaded — fallback: try model.predict on raw text (may fail)
+        return None
+    # hstack the sparse matrices in the same order we loaded them
+    try:
+        X_comb = hstack(mats).tocsr()
+    except Exception as e:
+        # if any mat is dense, convert to sparse and hstack
+        mats2 = [csr_matrix(m) if not hasattr(m, "tocsr") else m.tocsr() for m in mats]
+        X_comb = hstack(mats2).tocsr()
+    # If model expects a fixed size, pad or trim to match
+    if EXPECTED_DIM is not None:
+        cur = X_comb.shape[1]
+        if cur < EXPECTED_DIM:
+            # pad with zeros on the right
+            pad_width = EXPECTED_DIM - cur
+            pad = csr_matrix((X_comb.shape[0], pad_width), dtype=X_comb.dtype)
+            X_comb = hstack([X_comb, pad]).tocsr()
+        elif cur > EXPECTED_DIM:
+            # trim extra columns (best-effort)
+            X_comb = X_comb[:, :EXPECTED_DIM]
+    return X_comb
+# prediction function
 def predict(text):
     try:
+        X = build_features(text)
+        if X is None:
+            return {"error": "No vectorizers available; cannot build features."}
+        # If model accepts predict_proba return probabilities else labels
+        if hasattr(model, "predict_proba"):
+            out = model.predict_proba(X).tolist()
         else:
+            out = model.predict(X).tolist()
+        return {"prediction": out}
     except Exception as e:
         return {"error": str(e)}
+# Gradio interface
+iface = gr.Interface(fn=predict, inputs=gr.Textbox(lines=2, placeholder="Aqua, glycerin, ..."), outputs="json",
+                     title="Cosmetic Category Classifier")
+iface.launch()