Spaces:

breadlicker45
/

multilingual-bert-gender-classification-demo

Sleeping

App Files Files Community

breadlicker45 commited on Jul 2

Commit

65c4e1b

verified ·

1 Parent(s): d8982cb

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -31

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 import os
@@ -10,20 +11,21 @@ HF_TOKEN = os.getenv("HF_TOKEN")
 if HF_TOKEN is None:
     raise ValueError("Hugging Face token not found. Please set the HF_TOKEN secret in your Space settings.")
-# The ID of your private model on the Hub
 MODEL_ID = "breadlicker45/bilingual-large-gender-v4-test"
-# Set up device (use GPU if available, otherwise CPU)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
 print(f"Loading model: {MODEL_ID}...")
 try:
-    # Load tokenizer and model, providing the token for private access
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN, trust_remote_code=True)
-    model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID, token=HF_TOKEN,trust_remote_code=True, num_labels=3)
-    # Move the model to the selected device ONCE for efficiency
     model.to(device)
     print("Model loaded successfully!")
@@ -32,38 +34,19 @@ except Exception as e:
     raise e
 # --- 2. Define the Manual Prediction Function ---
 def classify_gender(text: str) -> dict:
-    """
-    Performs manual inference on the input text and returns a dictionary
-    of label probabilities suitable for Gradio's Label component.
-    """
     if not text or not text.strip():
-        return None  # Handle empty input
-    # 1. Tokenize the input text
-    # The tokenizer prepares the text in the format the model expects.
     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
-    # 2. Move tokenized inputs to the same device as the model
     inputs = {k: v.to(device) for k, v in inputs.items()}
-    # 3. Get model predictions
-    # `torch.no_grad()` is used for inference to disable gradient calculations,
-    # which saves memory and speeds up computation.
     with torch.no_grad():
         logits = model(**inputs).logits
-    # 4. Convert logits to probabilities
-    # The softmax function converts the raw output scores (logits) into a
-    # probability distribution across all labels.
     probabilities = torch.nn.functional.softmax(logits, dim=-1)
-    # 5. Format the output for Gradio's Label component
-    # We create a dictionary mapping each label name to its probability score.
-    # `model.config.id2label` provides the mapping from class index to label name
-    # e.g., {0: 'female', 1: 'male', 2: 'neutral'}
-    scores = probabilities.squeeze().tolist() # Convert tensor to a simple list
     results = {}
     for i, score in enumerate(scores):
@@ -73,7 +56,7 @@ def classify_gender(text: str) -> dict:
     return results
 # --- 3. Create the Gradio Interface ---
-# (This part remains the same as it correctly displays the dictionary output)
 DESCRIPTION = """
 ## Bilingual Gender Classifier

 import gradio as gr
+# BE EXPLICIT: Import the specific model class we need
+from transformers import AutoTokenizer, XLMRobertaForSequenceClassification
 import torch
 import os
 if HF_TOKEN is None:
     raise ValueError("Hugging Face token not found. Please set the HF_TOKEN secret in your Space settings.")
 MODEL_ID = "breadlicker45/bilingual-large-gender-v4-test"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
 print(f"Loading model: {MODEL_ID}...")
 try:
+    # Tokenizer can still be loaded automatically
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
+    # THE FIX: Use the explicit class instead of AutoModelForSequenceClassification.
+    # This ignores the problematic 'auto_map' in config.json and forces the
+    # use of the standard XLM-RoBERTa architecture for sequence classification.
+    model = XLMRobertaForSequenceClassification.from_pretrained(MODEL_ID, token=HF_TOKEN)
+    # Move the model to the selected device
     model.to(device)
     print("Model loaded successfully!")
     raise e
 # --- 2. Define the Manual Prediction Function ---
+# (This function is already correct and does not need changes)
 def classify_gender(text: str) -> dict:
     if not text or not text.strip():
+        return None
     inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
     inputs = {k: v.to(device) for k, v in inputs.items()}
     with torch.no_grad():
         logits = model(**inputs).logits
     probabilities = torch.nn.functional.softmax(logits, dim=-1)
+    scores = probabilities.squeeze().tolist()
     results = {}
     for i, score in enumerate(scores):
     return results
 # --- 3. Create the Gradio Interface ---
+# (This part remains the same)
 DESCRIPTION = """
 ## Bilingual Gender Classifier