Frenchizer commited on
Commit
5ad0807
·
verified ·
1 Parent(s): 7e40604

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -10
app.py CHANGED
@@ -26,15 +26,15 @@ labels = [
26
  "literature", "machine learning", "marketing", "medicine",
27
  "music", "personal development", "philosophy", "physics",
28
  "politics", "poetry", "programming", "real estate", "retail",
29
- "robotics", "slang", "social media", "sports", "sustained",
30
- "technical", "theater", "tourism", "travel"
31
  ]
32
 
33
  def softmax_with_temperature(logits, temperature=1.0):
34
  exp_logits = np.exp(logits / temperature)
35
  return exp_logits / np.sum(exp_logits, axis=-1, keepdims=True)
36
 
37
- def detect_context(input_text, temperature=2.0, score_threshold=0.05):
38
  # Tokenize input text
39
  inputs = context_tokenizer(input_text, return_tensors="np", padding=True, truncation=True, max_length=512)
40
  input_ids = inputs["input_ids"].astype(np.int64)
@@ -50,9 +50,6 @@ def detect_context(input_text, temperature=2.0, score_threshold=0.05):
50
  "attention_mask": attention_mask
51
  })
52
 
53
- # Debugging: Check output shape
54
- print(f"Logits shape: {outputs[0].shape}") # Expected: (batch_size, num_labels)
55
-
56
  logits = outputs[0][0] # Assuming batch size 1; take the first set of logits
57
 
58
  # Debugging: Print raw logits
@@ -61,16 +58,62 @@ def detect_context(input_text, temperature=2.0, score_threshold=0.05):
61
  # Apply softmax with temperature
62
  scores = softmax_with_temperature(logits, temperature=temperature)
63
 
64
- # Debugging: Print scores
65
- print(f"Scores with softmax: {scores}")
66
-
67
  # Pair labels with scores
68
  label_scores = [(label, score) for label, score in zip(labels, scores)]
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  gr.Interface(
71
  fn=process_request,
72
  inputs="text",
73
  outputs="text",
74
  live=True
75
  ).launch()
76
-
 
26
  "literature", "machine learning", "marketing", "medicine",
27
  "music", "personal development", "philosophy", "physics",
28
  "politics", "poetry", "programming", "real estate", "retail",
29
+ "robotics", "slang", "social media", "speech", "sports",
30
+ "sustained", "technical", "theater", "tourism", "travel"
31
  ]
32
 
33
  def softmax_with_temperature(logits, temperature=1.0):
34
  exp_logits = np.exp(logits / temperature)
35
  return exp_logits / np.sum(exp_logits, axis=-1, keepdims=True)
36
 
37
+ def detect_context(input_text, temperature=2.0, top_n=3, score_threshold=0.05):
38
  # Tokenize input text
39
  inputs = context_tokenizer(input_text, return_tensors="np", padding=True, truncation=True, max_length=512)
40
  input_ids = inputs["input_ids"].astype(np.int64)
 
50
  "attention_mask": attention_mask
51
  })
52
 
 
 
 
53
  logits = outputs[0][0] # Assuming batch size 1; take the first set of logits
54
 
55
  # Debugging: Print raw logits
 
58
  # Apply softmax with temperature
59
  scores = softmax_with_temperature(logits, temperature=temperature)
60
 
 
 
 
61
  # Pair labels with scores
62
  label_scores = [(label, score) for label, score in zip(labels, scores)]
63
 
64
+ # Sort by scores in descending order
65
+ sorted_labels = sorted(label_scores, key=lambda x: x[1], reverse=True)
66
+
67
+ # Filter by threshold and return top_n contexts
68
+ filtered_labels = [label for label, score in sorted_labels if score > score_threshold]
69
+ top_contexts = filtered_labels[:top_n]
70
+
71
+ print(f"All scores: {label_scores}") # Debugging: Print all scores
72
+ print(f"Selected contexts: {top_contexts}") # Debugging: Print selected contexts
73
+
74
+ return top_contexts if top_contexts else ["general"]
75
+
76
+ def translate_text(input_text):
77
+ tokenized_input = tokenizer(
78
+ input_text, return_tensors="np",
79
+ padding=True, truncation=True, max_length=512
80
+ )
81
+
82
+ input_ids = tokenized_input["input_ids"].astype(np.int64)
83
+ attention_mask = tokenized_input["attention_mask"].astype(np.int64)
84
+
85
+ decoder_start_token_id = tokenizer.cls_token_id or tokenizer.pad_token_id
86
+ decoder_input_ids = np.array([[decoder_start_token_id]], dtype=np.int64)
87
+
88
+ for _ in range(512):
89
+ outputs = translation_session.run(
90
+ None,
91
+ {
92
+ "input_ids": input_ids,
93
+ "attention_mask": attention_mask,
94
+ "decoder_input_ids": decoder_input_ids,
95
+ }
96
+ )
97
+
98
+ logits = outputs[0]
99
+ next_token_id = np.argmax(logits[:, -1, :], axis=-1).item()
100
+ decoder_input_ids = np.concatenate(
101
+ [decoder_input_ids, np.array([[next_token_id]], dtype=np.int64)], axis=1
102
+ )
103
+
104
+ if next_token_id == tokenizer.eos_token_id:
105
+ break
106
+
107
+ return tokenizer.decode(decoder_input_ids[0], skip_special_tokens=True)
108
+
109
+ def process_request(input_text):
110
+ context = detect_context(input_text)
111
+ translation = translate_text(input_text) # Translate without needing to pass context explicitly
112
+ return translation
113
+
114
  gr.Interface(
115
  fn=process_request,
116
  inputs="text",
117
  outputs="text",
118
  live=True
119
  ).launch()