breadlicker45 commited on
Commit
eb1d7b9
·
verified ·
1 Parent(s): b354cd2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -40
app.py CHANGED
@@ -1,69 +1,79 @@
1
  import gradio as gr
2
- from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
 
3
  import os
4
 
5
- # --- 1. Load Model from Hugging Face Hub ---
6
 
7
  # Get the Hugging Face token from the Space's secrets
8
- # This is crucial for accessing a private model
9
  HF_TOKEN = os.getenv("HF_TOKEN")
10
-
11
- # Ensure the token is set
12
  if HF_TOKEN is None:
13
- raise ValueError(
14
- "Hugging Face token not found. Please set the HF_TOKEN secret in your Space settings."
15
- )
16
 
17
  # The ID of your private model on the Hub
18
  MODEL_ID = "breadlicker45/bilingual-large-gender-v4-test"
19
 
 
 
 
 
20
  print(f"Loading model: {MODEL_ID}...")
21
  try:
22
- # Explicitly load tokenizer and model to pass the token and trust_remote_code
23
- # trust_remote_code=True is needed for models with custom architectures/code
24
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
 
25
 
26
- model = AutoModelForSequenceClassification.from_pretrained(
27
- MODEL_ID,
28
- token=HF_TOKEN,
29
- trust_remote_code=True # IMPORTANT for custom models
30
- )
31
-
32
- # Create the pipeline using the pre-loaded model and tokenizer
33
- classifier = pipeline(
34
- "text-classification",
35
- model=model,
36
- tokenizer=tokenizer
37
- )
38
  print("Model loaded successfully!")
39
 
40
  except Exception as e:
41
- # Provide a helpful error message if loading fails
42
  print(f"Error loading model: {e}")
43
- # You can display this error in the Gradio UI as well if you want
44
- # For now, we'll just let the Space crash with a clear log message.
45
  raise e
46
 
47
- # --- 2. Define the Prediction Function ---
48
 
49
  def classify_gender(text: str) -> dict:
50
  """
51
- Takes a string of text and returns the model's predictions
52
- in a format that Gradio's Label component can display.
53
  """
54
  if not text or not text.strip():
55
- # Handle empty or whitespace-only input gracefully
56
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
- # The pipeline will run the text through the model
59
- # top_k=3 ensures we get scores for all 3 labels
60
- predictions = classifier(text, top_k=3)
61
-
62
- # Format the predictions into a {label: confidence} dictionary for the gr.Label component
63
- formatted_predictions = {p['label']: p['score'] for p in predictions}
64
- return formatted_predictions
65
 
66
  # --- 3. Create the Gradio Interface ---
 
67
 
68
  DESCRIPTION = """
69
  ## Bilingual Gender Classifier
@@ -79,7 +89,6 @@ ARTICLE = """
79
  </div>
80
  """
81
 
82
- # Define some examples for users to try
83
  examples = [
84
  ["He went to the store to buy a new hammer."],
85
  ["La doctora le recetó un medicamento a su paciente."],
@@ -121,10 +130,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
121
  fn=classify_gender,
122
  inputs=text_input,
123
  outputs=output_label,
124
- api_name="classify" # You can add an API name for programmatic access
125
  )
126
 
127
  # --- 4. Launch the App ---
128
-
129
  if __name__ == "__main__":
130
  demo.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
+ import torch
4
  import os
5
 
6
+ # --- 1. Setup: Load Model and Define Device ---
7
 
8
  # Get the Hugging Face token from the Space's secrets
 
9
  HF_TOKEN = os.getenv("HF_TOKEN")
 
 
10
  if HF_TOKEN is None:
11
+ raise ValueError("Hugging Face token not found. Please set the HF_TOKEN secret in your Space settings.")
 
 
12
 
13
  # The ID of your private model on the Hub
14
  MODEL_ID = "breadlicker45/bilingual-large-gender-v4-test"
15
 
16
+ # Set up device (use GPU if available, otherwise CPU)
17
+ device = "cuda" if torch.cuda.is_available() else "cpu"
18
+ print(f"Using device: {device}")
19
+
20
  print(f"Loading model: {MODEL_ID}...")
21
  try:
22
+ # Load tokenizer and model, providing the token for private access
 
23
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
24
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID, token=HF_TOKEN)
25
 
26
+ # Move the model to the selected device ONCE for efficiency
27
+ model.to(device)
 
 
 
 
 
 
 
 
 
 
28
  print("Model loaded successfully!")
29
 
30
  except Exception as e:
 
31
  print(f"Error loading model: {e}")
 
 
32
  raise e
33
 
34
+ # --- 2. Define the Manual Prediction Function ---
35
 
36
  def classify_gender(text: str) -> dict:
37
  """
38
+ Performs manual inference on the input text and returns a dictionary
39
+ of label probabilities suitable for Gradio's Label component.
40
  """
41
  if not text or not text.strip():
42
+ return None # Handle empty input
43
+
44
+ # 1. Tokenize the input text
45
+ # The tokenizer prepares the text in the format the model expects.
46
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
47
+
48
+ # 2. Move tokenized inputs to the same device as the model
49
+ inputs = {k: v.to(device) for k, v in inputs.items()}
50
+
51
+ # 3. Get model predictions
52
+ # `torch.no_grad()` is used for inference to disable gradient calculations,
53
+ # which saves memory and speeds up computation.
54
+ with torch.no_grad():
55
+ logits = model(**inputs).logits
56
+
57
+ # 4. Convert logits to probabilities
58
+ # The softmax function converts the raw output scores (logits) into a
59
+ # probability distribution across all labels.
60
+ probabilities = torch.nn.functional.softmax(logits, dim=-1)
61
+
62
+ # 5. Format the output for Gradio's Label component
63
+ # We create a dictionary mapping each label name to its probability score.
64
+ # `model.config.id2label` provides the mapping from class index to label name
65
+ # e.g., {0: 'female', 1: 'male', 2: 'neutral'}
66
+ scores = probabilities.squeeze().tolist() # Convert tensor to a simple list
67
 
68
+ results = {}
69
+ for i, score in enumerate(scores):
70
+ label_name = model.config.id2label[i]
71
+ results[label_name] = score
72
+
73
+ return results
 
74
 
75
  # --- 3. Create the Gradio Interface ---
76
+ # (This part remains the same as it correctly displays the dictionary output)
77
 
78
  DESCRIPTION = """
79
  ## Bilingual Gender Classifier
 
89
  </div>
90
  """
91
 
 
92
  examples = [
93
  ["He went to the store to buy a new hammer."],
94
  ["La doctora le recetó un medicamento a su paciente."],
 
130
  fn=classify_gender,
131
  inputs=text_input,
132
  outputs=output_label,
133
+ api_name="classify"
134
  )
135
 
136
  # --- 4. Launch the App ---
 
137
  if __name__ == "__main__":
138
  demo.launch()