ilan541 commited on
Commit
65367f6
·
1 Parent(s): 9b4bd92

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -2
app.py CHANGED
@@ -9,12 +9,39 @@ tokenizer = AutoTokenizer.from_pretrained('roberta-base')
9
  from transformers import TFAutoModelForSequenceClassification
10
  model = TFAutoModelForSequenceClassification.from_pretrained("ilan541/OncUponTim")
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def predict(your_text):
13
  # split the text
 
 
 
 
14
 
15
  # inference
16
- inp = tokenizer(your_text, return_tensors='tf')
17
- if np.argmax(softmax(model(inp).logits)) == 0:
18
  return 'This content is not of high standard. It needs editing. '
19
  else:
20
  return 'Promising content! Our algorithm predicts it will be very popular.'
 
9
  from transformers import TFAutoModelForSequenceClassification
10
  model = TFAutoModelForSequenceClassification.from_pretrained("ilan541/OncUponTim")
11
 
12
+ def split_text(text, nb_splits):
13
+ cols = ['split_'+ str(i) for i in range(1, nb_splits+1)]
14
+
15
+ l = len(text)
16
+ chars = int(l/nb_splits)
17
+
18
+ out = []
19
+
20
+ for i in range(0, l, chars):
21
+ out.append(text[ i : i+chars])
22
+ return out
23
+
24
+ def get_probs(list_of_portions):
25
+ y_pred_logits_0 = []
26
+ y_pred_logits_1 = []
27
+ for text in list_of_portions:
28
+ inp = tokenizer(text, return_tensors='tf')
29
+ y_pred = model(inp)
30
+ y_pred_logits_0.append(y_pred.logits[:,0])
31
+ y_pred_logits_1.append(y_pred.logits[:,1])
32
+
33
+ return np.mean(y_pred_logits_0), np.mean(y_pred_logits_1)
34
+
35
+
36
  def predict(your_text):
37
  # split the text
38
+ nb_splits = 3
39
+ splits = split_text(text, nb_splits)
40
+
41
+ y_logits_0, y_logits_1 = get_probs(splits)
42
 
43
  # inference
44
+ if np.argmax([y_logits_0, y_logits_1]) == 0:
 
45
  return 'This content is not of high standard. It needs editing. '
46
  else:
47
  return 'Promising content! Our algorithm predicts it will be very popular.'