VasudevAdhikari commited on
Commit
e916274
·
1 Parent(s): ea041de

Update with real nlp encoder codes

Browse files
Files changed (2) hide show
  1. app.py +77 -4
  2. requirements.txt +6 -0
app.py CHANGED
@@ -1,7 +1,80 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import torch
5
+ from transformers import (
6
+ AutoTokenizer,
7
+ AutoModel,
8
+ AutoModelForSequenceClassification
9
+ )
10
+ from scipy.special import softmax
11
 
12
+ # ==============================
13
+ # LOAD MODELS ONCE (GLOBAL)
14
+ # ==============================
15
 
16
+ bert_model_name = "bert-base-uncased"
17
+ tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
18
+ bert_model = AutoModel.from_pretrained(bert_model_name)
19
+ bert_model.eval()
20
+
21
+ sentiment_model_name = "cardiffnlp/twitter-roberta-base-sentiment"
22
+ sentiment_tokenizer = AutoTokenizer.from_pretrained(sentiment_model_name)
23
+ sentiment_model = AutoModelForSequenceClassification.from_pretrained(sentiment_model_name)
24
+ sentiment_model.eval()
25
+
26
+
27
+ def nlp_encode_sentence(df: pd.DataFrame) -> pd.DataFrame:
28
+
29
+ feature_rows = []
30
+
31
+ for sentence in df["value"]:
32
+
33
+ inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
34
+
35
+ with torch.no_grad():
36
+ outputs = bert_model(**inputs)
37
+
38
+ cls_embedding = outputs.last_hidden_state[:, 0, :].squeeze().numpy()
39
+
40
+ embedding_mean = np.mean(cls_embedding)
41
+ embedding_median = np.median(cls_embedding)
42
+ embedding_std = np.std(cls_embedding)
43
+ embedding_min = np.min(cls_embedding)
44
+ embedding_max = np.max(cls_embedding)
45
+
46
+ sentiment_inputs = sentiment_tokenizer(
47
+ sentence,
48
+ return_tensors="pt",
49
+ truncation=True,
50
+ padding=True
51
+ )
52
+
53
+ with torch.no_grad():
54
+ sentiment_outputs = sentiment_model(**sentiment_inputs)
55
+
56
+ probs = softmax(sentiment_outputs.logits.numpy()[0])
57
+ sentiment_score = probs[2] - probs[0]
58
+
59
+ feature_rows.append({
60
+ "embedding_mean": embedding_mean,
61
+ "embedding_median": embedding_median,
62
+ "embedding_std": embedding_std,
63
+ "embedding_min": embedding_min,
64
+ "embedding_max": embedding_max,
65
+ "sentiment_score": sentiment_score
66
+ })
67
+
68
+ features_df = pd.DataFrame(feature_rows)
69
+
70
+ return features_df
71
+
72
+
73
+ demo = gr.Interface(
74
+ fn=nlp_encode_sentence,
75
+ inputs=gr.Dataframe(),
76
+ outputs=gr.Dataframe(),
77
+ api_name="encode"
78
+ )
79
+
80
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ pandas
3
+ numpy
4
+ torch
5
+ transformers
6
+ scipy