Spaces:

Frenchizer
/

space_2

Build error

App Files Files Community

Frenchizer commited on Jan 25, 2025

Commit

5ad0807

verified ·

1 Parent(s): 7e40604

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -10

app.py CHANGED Viewed

@@ -26,15 +26,15 @@ labels = [
     "literature", "machine learning", "marketing", "medicine",
     "music", "personal development", "philosophy", "physics",
     "politics", "poetry", "programming", "real estate", "retail",
-    "robotics", "slang", "social media", "sports", "sustained",
-    "technical", "theater", "tourism", "travel"
 ]
 def softmax_with_temperature(logits, temperature=1.0):
     exp_logits = np.exp(logits / temperature)
     return exp_logits / np.sum(exp_logits, axis=-1, keepdims=True)
-def detect_context(input_text, temperature=2.0, score_threshold=0.05):
     # Tokenize input text
     inputs = context_tokenizer(input_text, return_tensors="np", padding=True, truncation=True, max_length=512)
     input_ids = inputs["input_ids"].astype(np.int64)
@@ -50,9 +50,6 @@ def detect_context(input_text, temperature=2.0, score_threshold=0.05):
         "attention_mask": attention_mask
     })
-    # Debugging: Check output shape
-    print(f"Logits shape: {outputs[0].shape}")  # Expected: (batch_size, num_labels)
     logits = outputs[0][0]  # Assuming batch size 1; take the first set of logits
     # Debugging: Print raw logits
@@ -61,16 +58,62 @@ def detect_context(input_text, temperature=2.0, score_threshold=0.05):
     # Apply softmax with temperature
     scores = softmax_with_temperature(logits, temperature=temperature)
-    # Debugging: Print scores
-    print(f"Scores with softmax: {scores}")
     # Pair labels with scores
     label_scores = [(label, score) for label, score in zip(labels, scores)]
 gr.Interface(
     fn=process_request,
     inputs="text",
     outputs="text",
     live=True
 ).launch()

     "literature", "machine learning", "marketing", "medicine",
     "music", "personal development", "philosophy", "physics",
     "politics", "poetry", "programming", "real estate", "retail",
+    "robotics", "slang", "social media", "speech", "sports",
+    "sustained", "technical", "theater", "tourism", "travel"
 ]
 def softmax_with_temperature(logits, temperature=1.0):
     exp_logits = np.exp(logits / temperature)
     return exp_logits / np.sum(exp_logits, axis=-1, keepdims=True)
+def detect_context(input_text, temperature=2.0, top_n=3, score_threshold=0.05):
     # Tokenize input text
     inputs = context_tokenizer(input_text, return_tensors="np", padding=True, truncation=True, max_length=512)
     input_ids = inputs["input_ids"].astype(np.int64)
         "attention_mask": attention_mask
     })
     logits = outputs[0][0]  # Assuming batch size 1; take the first set of logits
     # Debugging: Print raw logits
     # Apply softmax with temperature
     scores = softmax_with_temperature(logits, temperature=temperature)
     # Pair labels with scores
     label_scores = [(label, score) for label, score in zip(labels, scores)]
+    # Sort by scores in descending order
+    sorted_labels = sorted(label_scores, key=lambda x: x[1], reverse=True)
+    # Filter by threshold and return top_n contexts
+    filtered_labels = [label for label, score in sorted_labels if score > score_threshold]
+    top_contexts = filtered_labels[:top_n]
+    print(f"All scores: {label_scores}")  # Debugging: Print all scores
+    print(f"Selected contexts: {top_contexts}")  # Debugging: Print selected contexts
+    return top_contexts if top_contexts else ["general"]
+def translate_text(input_text):
+    tokenized_input = tokenizer(
+        input_text, return_tensors="np",
+        padding=True, truncation=True, max_length=512
+    )
+    input_ids = tokenized_input["input_ids"].astype(np.int64)
+    attention_mask = tokenized_input["attention_mask"].astype(np.int64)
+    decoder_start_token_id = tokenizer.cls_token_id or tokenizer.pad_token_id
+    decoder_input_ids = np.array([[decoder_start_token_id]], dtype=np.int64)
+    for _ in range(512):
+        outputs = translation_session.run(
+            None,
+            {
+                "input_ids": input_ids,
+                "attention_mask": attention_mask,
+                "decoder_input_ids": decoder_input_ids,
+            }
+        )
+        logits = outputs[0]
+        next_token_id = np.argmax(logits[:, -1, :], axis=-1).item()
+        decoder_input_ids = np.concatenate(
+            [decoder_input_ids, np.array([[next_token_id]], dtype=np.int64)], axis=1
+        )
+        if next_token_id == tokenizer.eos_token_id:
+            break
+    return tokenizer.decode(decoder_input_ids[0], skip_special_tokens=True)
+def process_request(input_text):
+    context = detect_context(input_text)
+    translation = translate_text(input_text)  # Translate without needing to pass context explicitly
+    return translation
 gr.Interface(
     fn=process_request,
     inputs="text",
     outputs="text",
     live=True
 ).launch()