Spaces:

Frenchizer
/

space_9

Sleeping

App Files Files Community

Frenchizer commited on Jan 26, 2025

Commit

6b27907

verified ·

1 Parent(s): 65f8dcc

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -22

app.py CHANGED Viewed

@@ -4,13 +4,12 @@ from sklearn.metrics.pairwise import cosine_similarity
 import torch
 import numpy as np
 from gradio_client import Client
-# Cache the model and tokenizer using lru_cache
 from functools import lru_cache
 @lru_cache(maxsize=1)
 def load_model_and_tokenizer():
-    model_name = "./all-MiniLM-L6-v2"
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModel.from_pretrained(model_name)
     return tokenizer, model
@@ -36,33 +35,31 @@ labels = [
 @lru_cache(maxsize=1)
 def precompute_label_embeddings():
-    def encode_text(texts):
-        inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
-        with torch.no_grad():
-            outputs = model(**inputs)
-        return outputs.last_hidden_state.mean(dim=1).numpy()  # Use mean pooling for embeddings
-    return encode_text(labels)
 label_embeddings = precompute_label_embeddings()
-# Function to detect context
 def detect_context(input_text, high_confidence_threshold=0.9, fallback_threshold=0.8, max_results=3):
-    def encode_text(texts):
-        inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
-        with torch.no_grad():
-            outputs = model(**inputs)
-        return outputs.last_hidden_state.mean(dim=1).numpy()  # Use mean pooling for embeddings
-    input_embedding = encode_text([input_text])
     similarities = cosine_similarity(input_embedding, label_embeddings)[0]
-    for label, score in zip(labels, similarities):
-        if score >= high_confidence_threshold:
-            return [label]
-    label_scores = [(label, score) for label, score in zip(labels, similarities) if score >= fallback_threshold]
-    sorted_labels = sorted(label_scores, key=lambda x: x[1], reverse=True)[:max_results]
-    return [label for label, score in sorted_labels] if sorted_labels else ["general"]
 # Translation client
 translation_client = Client("Frenchizer/space_3")

 import torch
 import numpy as np
 from gradio_client import Client
 from functools import lru_cache
+# Cache the model and tokenizer using lru_cache
 @lru_cache(maxsize=1)
 def load_model_and_tokenizer():
+    model_name = "./all-MiniLM-L6-v2"  # Replace with your Space and model path
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModel.from_pretrained(model_name)
     return tokenizer, model
 @lru_cache(maxsize=1)
 def precompute_label_embeddings():
+    inputs = tokenizer(labels, padding=True, truncation=True, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model(**inputs)
+    return outputs.last_hidden_state.mean(dim=1).numpy()  # Mean pooling for embeddings
 label_embeddings = precompute_label_embeddings()
+# Function to detect context (optimized)
 def detect_context(input_text, high_confidence_threshold=0.9, fallback_threshold=0.8, max_results=3):
+    # Encode the input text
+    inputs = tokenizer([input_text], padding=True, truncation=True, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model(**inputs)
+    input_embedding = outputs.last_hidden_state.mean(dim=1).numpy()  # Mean pooling for embedding
+    # Compute cosine similarities (optimized)
     similarities = cosine_similarity(input_embedding, label_embeddings)[0]
+    # Find top-N labels based on thresholds
+    top_indices = np.argsort(similarities)[-max_results:][::-1]
+    top_labels = [labels[i] for i in top_indices if similarities[i] >= fallback_threshold]
+    # Return high-confidence labels if any, otherwise fallback labels
+    high_conf_labels = [label for label in top_labels if similarities[labels.index(label)] >= high_confidence_threshold]
+    return high_conf_labels if high_conf_labels else top_labels[:max_results]
 # Translation client
 translation_client = Client("Frenchizer/space_3")