space_21

Sleeping

App Files Files Community

Frenchizer commited on Feb 11, 2025

Commit

c1151ec

verified ·

1 Parent(s): e47e728

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -106

app.py CHANGED Viewed

@@ -3,13 +3,14 @@ from transformers import AutoTokenizer, AutoModel
 from sklearn.metrics.pairwise import cosine_similarity
 import torch
 import numpy as np
-from gradio_client import Client
 from functools import lru_cache
 # Cache the model and tokenizer using lru_cache
 @lru_cache(maxsize=1)
 def load_model_and_tokenizer():
-    model_name = "./all-MiniLM-L6-v2"  # Replace with your Space and model path
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModel.from_pretrained(model_name)
     return tokenizer, model
@@ -19,117 +20,77 @@ tokenizer, model = load_model_and_tokenizer()
 # Precompute label embeddings
 labels = [
-    "aerospace", "anatomy", "anthropology", "art",
-    "automotive", "blockchain", "biology", "chemistry",
-    "cryptocurrency", "data science", "design", "e-commerce",
-    "education", "engineering", "entertainment", "environment",
-    "fashion", "finance", "food commerce", "gaming",
-    "healthcare", "history", "information technology",
-    "legal", "machine learning", "marketing", "medicine",
-    "music", "philosophy", "physics", "politics", "real estate", "retail",
-    "robotics", "social media", "sports", "technical",
-    "tourism", "travel"
 ]
 tones = [
     "formal", "positive", "negative", "poetic", "polite", "subtle", "casual", "neutral",
-    "informal", "pompous", "sustained", "rude", "sustained",
-    "sophisticated", "playful", "serious", "friendly"
 ]
-styles = [
-    "poetry", "novel", "theater", "slang", "speech", "keywords", "html", "programming"
-]
-gender_number = [
-    "masculine singular", "masculine plural", "feminine singular", "feminine plural"
-]
-@lru_cache(maxsize=1)
-def precompute_label_embeddings():
-    inputs = tokenizer(labels, padding=True, truncation=True, return_tensors="pt")
     with torch.no_grad():
-        outputs = model(**inputs)
-    return outputs.last_hidden_state.mean(dim=1).numpy()  # Mean pooling for embeddings
-label_embeddings = precompute_label_embeddings()
-# Softmax function to convert scores to probabilities
-def softmax(x):
-    exp_x = np.exp(x - np.max(x))  # Subtract max for numerical stability
-    return exp_x / exp_x.sum()
-# Function to detect context
-def detect_context(input_text, threshold=0.03):
-    # Encode the input text
-    inputs = tokenizer([input_text], padding=True, truncation=True, return_tensors="pt")
     with torch.no_grad():
-        outputs = model(**inputs)
-    input_embedding = outputs.last_hidden_state.mean(dim=1).numpy()  # Mean pooling for embedding
-    # Compute cosine similarities
-    similarities = cosine_similarity(input_embedding, label_embeddings)[0]
-    # Apply softmax to convert similarities to probabilities
-    probabilities = softmax(similarities)
-    # Pair each label with its probability
-    label_probabilities = list(zip(labels, probabilities))
-    # Filter contexts with confidence >= threshold
-    high_confidence_contexts = [(label, score) for label, score in label_probabilities if score >= threshold]
-    # If no contexts meet the threshold, default to "general"
-    if not high_confidence_contexts:
-        high_confidence_contexts = [("general", 1.0)]  # Assign a default score of 1.0 for "general"
-    return high_confidence_contexts
-# Mock translation clients for different contexts
-def get_translation_client(context):
-    """
-    Returns the appropriate Hugging Face Space client for the given context.
-    For now, all contexts use the same mock space.
-    """
-    return Client("Frenchizer/space_7")  # Replace with actual Space paths for each context
-def translate_text(input_text, context):
-    """
-    Translates the input text using the appropriate model for the given context.
-    """
-    client = get_translation_client(context)
-    return client.predict(input_text)
-def process_request(input_text):
-    # Step 1: Detect context
-    context_results = detect_context(input_text)
-    # Step 2: Translate the text for each context
-    translations = {}
-    for context, score in context_results:
-        translations[context] = translate_text(input_text, context)
-    # Step 3: Print the list of high-confidence contexts and translations
-    print("High-confidence contexts (score >= 0.022):", context_results)
-    print("Translations:", translations)
-    # Return the translations and contexts
-    return translations, context_results
-# Gradio interface
-def gradio_interface(input_text):
-    translation, contexts = process_request(input_text)
-    # Format the output
-    output = f"{translation}\n"
-    return output.strip()
-# Create the Gradio interface
-interface = gr.Interface(
-    fn=gradio_interface,
-    inputs="text",
-    outputs="text",
-    title="Frenchizer",
-    description="Translate text from English to French with context detection and threshold."
-)
-interface.launch()

 from sklearn.metrics.pairwise import cosine_similarity
 import torch
 import numpy as np
+import json
+import requests
 from functools import lru_cache
 # Cache the model and tokenizer using lru_cache
 @lru_cache(maxsize=1)
 def load_model_and_tokenizer():
+    model_name = "./all-MiniLM-L6-v2"  # Adjust if needed
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModel.from_pretrained(model_name)
     return tokenizer, model
 # Precompute label embeddings
 labels = [
+    "aerospace", "anatomy", "anthropology", "art", "automotive", "blockchain",
+    "biology", "chemistry", "cryptocurrency", "data science", "design", "e-commerce",
+    "education", "engineering", "entertainment", "environment", "fashion", "finance",
+    "food commerce", "gaming", "healthcare", "history", "information technology",
+    "legal", "machine learning", "marketing", "medicine", "music", "philosophy",
+    "physics", "politics", "real estate", "retail", "robotics", "social media",
+    "sports", "technical", "tourism", "travel"
 ]
 tones = [
     "formal", "positive", "negative", "poetic", "polite", "subtle", "casual", "neutral",
+    "informal", "pompous", "sustained", "rude"
 ]
+# Compute label embeddings
+def get_label_embeddings():
     with torch.no_grad():
+        tokenized = tokenizer(labels, padding=True, truncation=True, return_tensors="pt")
+        label_embeddings = model(**tokenized).last_hidden_state[:, 0, :].numpy()
+    return label_embeddings
+label_embeddings = get_label_embeddings()
+def detect_context(text: str):
+    # Encode input text
+    tokenized = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
     with torch.no_grad():
+        text_embedding = model(**tokenized).last_hidden_state[:, 0, :].numpy()
+    # Compute similarity scores
+    similarities = cosine_similarity(text_embedding, label_embeddings)[0]
+    # Get best matching context
+    best_index = np.argmax(similarities)
+    detected_context = labels[best_index]
+    return detected_context
+def process_and_translate(text: str):
+    detected_context = detect_context(text)
+    try:
+        # Forward text and detected context to space_7 for translation
+        translation_response = requests.post(
+            "https://api.gradio.app/v2/Frenchizer/space_7/predict",
+            json={"data": [text, detected_context]}
+        ).json()
+        if "error" in translation_response:
+            return json.dumps({
+                "error": "Translation failed",
+                "context": detected_context
+            })
+        return json.dumps({
+            "context": detected_context,
+            "translation": translation_response["data"][0]
+        })
+    except Exception as e:
+        return json.dumps({
+            "error": str(e),
+            "context": detected_context
+        })
+# Define Gradio interface
+with gr.Blocks() as interface:
+    input_text = gr.Textbox(label="Input Text")
+    output_json = gr.JSON(label="Context & Translation")
+    process_button = gr.Button("Process & Translate")
+    process_button.click(fn=process_and_translate, inputs=[input_text], outputs=[output_json])
+if __name__ == "__main__":
+    interface.launch()