Spaces:

breadlicker45
/

multilingual-bert-gender-classification-demo

Sleeping

App Files Files Community

breadlicker45 commited on Jul 2

Commit

eb1d7b9

verified ·

1 Parent(s): b354cd2

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -40

app.py CHANGED Viewed

@@ -1,69 +1,79 @@
 import gradio as gr
-from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
 import os
-# --- 1. Load Model from Hugging Face Hub ---
 # Get the Hugging Face token from the Space's secrets
-# This is crucial for accessing a private model
 HF_TOKEN = os.getenv("HF_TOKEN")
-# Ensure the token is set
 if HF_TOKEN is None:
-    raise ValueError(
-        "Hugging Face token not found. Please set the HF_TOKEN secret in your Space settings."
-    )
 # The ID of your private model on the Hub
 MODEL_ID = "breadlicker45/bilingual-large-gender-v4-test"
 print(f"Loading model: {MODEL_ID}...")
 try:
-    # Explicitly load tokenizer and model to pass the token and trust_remote_code
-    # trust_remote_code=True is needed for models with custom architectures/code
     tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
-    model = AutoModelForSequenceClassification.from_pretrained(
-        MODEL_ID,
-        token=HF_TOKEN,
-        trust_remote_code=True # IMPORTANT for custom models
-    )
-    # Create the pipeline using the pre-loaded model and tokenizer
-    classifier = pipeline(
-        "text-classification",
-        model=model,
-        tokenizer=tokenizer
-    )
     print("Model loaded successfully!")
 except Exception as e:
-    # Provide a helpful error message if loading fails
     print(f"Error loading model: {e}")
-    # You can display this error in the Gradio UI as well if you want
-    # For now, we'll just let the Space crash with a clear log message.
     raise e
-# --- 2. Define the Prediction Function ---
 def classify_gender(text: str) -> dict:
     """
-    Takes a string of text and returns the model's predictions
-    in a format that Gradio's Label component can display.
     """
     if not text or not text.strip():
-        # Handle empty or whitespace-only input gracefully
-        return None
-    # The pipeline will run the text through the model
-    # top_k=3 ensures we get scores for all 3 labels
-    predictions = classifier(text, top_k=3)
-    # Format the predictions into a {label: confidence} dictionary for the gr.Label component
-    formatted_predictions = {p['label']: p['score'] for p in predictions}
-    return formatted_predictions
 # --- 3. Create the Gradio Interface ---
 DESCRIPTION = """
 ## Bilingual Gender Classifier
@@ -79,7 +89,6 @@ ARTICLE = """
 </div>
 """
-# Define some examples for users to try
 examples = [
     ["He went to the store to buy a new hammer."],
     ["La doctora le recetó un medicamento a su paciente."],
@@ -121,10 +130,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         fn=classify_gender,
         inputs=text_input,
         outputs=output_label,
-        api_name="classify" # You can add an API name for programmatic access
     )
 # --- 4. Launch the App ---
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
 import os
+# --- 1. Setup: Load Model and Define Device ---
 # Get the Hugging Face token from the Space's secrets
 HF_TOKEN = os.getenv("HF_TOKEN")
 if HF_TOKEN is None:
+    raise ValueError("Hugging Face token not found. Please set the HF_TOKEN secret in your Space settings.")
 # The ID of your private model on the Hub
 MODEL_ID = "breadlicker45/bilingual-large-gender-v4-test"
+# Set up device (use GPU if available, otherwise CPU)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
 print(f"Loading model: {MODEL_ID}...")
 try:
+    # Load tokenizer and model, providing the token for private access
     tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
+    model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID, token=HF_TOKEN)
+    # Move the model to the selected device ONCE for efficiency
+    model.to(device)
     print("Model loaded successfully!")
 except Exception as e:
     print(f"Error loading model: {e}")
     raise e
+# --- 2. Define the Manual Prediction Function ---
 def classify_gender(text: str) -> dict:
     """
+    Performs manual inference on the input text and returns a dictionary
+    of label probabilities suitable for Gradio's Label component.
     """
     if not text or not text.strip():
+        return None  # Handle empty input
+    # 1. Tokenize the input text
+    # The tokenizer prepares the text in the format the model expects.
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
+    # 2. Move tokenized inputs to the same device as the model
+    inputs = {k: v.to(device) for k, v in inputs.items()}
+    # 3. Get model predictions
+    # `torch.no_grad()` is used for inference to disable gradient calculations,
+    # which saves memory and speeds up computation.
+    with torch.no_grad():
+        logits = model(**inputs).logits
+    # 4. Convert logits to probabilities
+    # The softmax function converts the raw output scores (logits) into a
+    # probability distribution across all labels.
+    probabilities = torch.nn.functional.softmax(logits, dim=-1)
+    # 5. Format the output for Gradio's Label component
+    # We create a dictionary mapping each label name to its probability score.
+    # `model.config.id2label` provides the mapping from class index to label name
+    # e.g., {0: 'female', 1: 'male', 2: 'neutral'}
+    scores = probabilities.squeeze().tolist() # Convert tensor to a simple list
+    results = {}
+    for i, score in enumerate(scores):
+        label_name = model.config.id2label[i]
+        results[label_name] = score
+    return results
 # --- 3. Create the Gradio Interface ---
+# (This part remains the same as it correctly displays the dictionary output)
 DESCRIPTION = """
 ## Bilingual Gender Classifier
 </div>
 """
 examples = [
     ["He went to the store to buy a new hammer."],
     ["La doctora le recetó un medicamento a su paciente."],
         fn=classify_gender,
         inputs=text_input,
         outputs=output_label,
+        api_name="classify"
     )
 # --- 4. Launch the App ---
 if __name__ == "__main__":
     demo.launch()