VasudevAdhikari commited on
Commit
a92fd71
·
1 Parent(s): e916274

Modify app.py with non-error typehints

Browse files
Files changed (1) hide show
  1. app.py +38 -20
app.py CHANGED
@@ -24,24 +24,38 @@ sentiment_model = AutoModelForSequenceClassification.from_pretrained(sentiment_m
24
  sentiment_model.eval()
25
 
26
 
 
 
 
 
 
 
27
  def nlp_encode_sentence(df: pd.DataFrame) -> pd.DataFrame:
28
 
 
 
 
29
  feature_rows = []
30
 
31
  for sentence in df["value"]:
32
 
33
- inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
 
 
 
 
 
34
 
35
  with torch.no_grad():
36
  outputs = bert_model(**inputs)
37
 
38
  cls_embedding = outputs.last_hidden_state[:, 0, :].squeeze().numpy()
39
 
40
- embedding_mean = np.mean(cls_embedding)
41
- embedding_median = np.median(cls_embedding)
42
- embedding_std = np.std(cls_embedding)
43
- embedding_min = np.min(cls_embedding)
44
- embedding_max = np.max(cls_embedding)
45
 
46
  sentiment_inputs = sentiment_tokenizer(
47
  sentence,
@@ -54,27 +68,31 @@ def nlp_encode_sentence(df: pd.DataFrame) -> pd.DataFrame:
54
  sentiment_outputs = sentiment_model(**sentiment_inputs)
55
 
56
  probs = softmax(sentiment_outputs.logits.numpy()[0])
57
- sentiment_score = probs[2] - probs[0]
58
 
59
- feature_rows.append({
60
- "embedding_mean": embedding_mean,
61
- "embedding_median": embedding_median,
62
- "embedding_std": embedding_std,
63
- "embedding_min": embedding_min,
64
- "embedding_max": embedding_max,
65
- "sentiment_score": sentiment_score
66
- })
67
 
68
- features_df = pd.DataFrame(feature_rows)
 
69
 
70
- return features_df
71
 
 
 
 
72
 
73
  demo = gr.Interface(
74
  fn=nlp_encode_sentence,
75
- inputs=gr.Dataframe(),
76
- outputs=gr.Dataframe(),
77
  api_name="encode"
78
  )
79
 
80
- demo.launch()
 
 
24
  sentiment_model.eval()
25
 
26
 
27
+ # ==============================
28
+ # FUNCTION (SIGNATURE UNCHANGED)
29
+ # Input: DataFrame
30
+ # Output: DataFrame
31
+ # ==============================
32
+
33
  def nlp_encode_sentence(df: pd.DataFrame) -> pd.DataFrame:
34
 
35
+ # Convert safely in case input is list-of-lists
36
+ df = pd.DataFrame(df, columns=["value"])
37
+
38
  feature_rows = []
39
 
40
  for sentence in df["value"]:
41
 
42
+ inputs = tokenizer(
43
+ sentence,
44
+ return_tensors="pt",
45
+ truncation=True,
46
+ padding=True
47
+ )
48
 
49
  with torch.no_grad():
50
  outputs = bert_model(**inputs)
51
 
52
  cls_embedding = outputs.last_hidden_state[:, 0, :].squeeze().numpy()
53
 
54
+ embedding_mean = float(np.mean(cls_embedding))
55
+ embedding_median = float(np.median(cls_embedding))
56
+ embedding_std = float(np.std(cls_embedding))
57
+ embedding_min = float(np.min(cls_embedding))
58
+ embedding_max = float(np.max(cls_embedding))
59
 
60
  sentiment_inputs = sentiment_tokenizer(
61
  sentence,
 
68
  sentiment_outputs = sentiment_model(**sentiment_inputs)
69
 
70
  probs = softmax(sentiment_outputs.logits.numpy()[0])
71
+ sentiment_score = float(probs[2] - probs[0])
72
 
73
+ feature_rows.append([
74
+ embedding_mean,
75
+ embedding_median,
76
+ embedding_std,
77
+ embedding_min,
78
+ embedding_max,
79
+ sentiment_score
80
+ ])
81
 
82
+ # Return as array (important for Gradio compatibility)
83
+ return feature_rows
84
 
 
85
 
86
+ # ==============================
87
+ # GRADIO INTERFACE
88
+ # ==============================
89
 
90
  demo = gr.Interface(
91
  fn=nlp_encode_sentence,
92
+ inputs=gr.Dataframe(type="array"),
93
+ outputs=gr.Dataframe(type="array"),
94
  api_name="encode"
95
  )
96
 
97
+ if __name__ == "__main__":
98
+ demo.launch()