Frenchizer commited on
Commit
a74487c
·
verified ·
1 Parent(s): 31cbd53

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -78
app.py CHANGED
@@ -3,14 +3,13 @@ from transformers import AutoTokenizer, AutoModel
3
  from sklearn.metrics.pairwise import cosine_similarity
4
  import torch
5
  import numpy as np
6
- import json
7
- import requests
8
  from functools import lru_cache
9
 
10
  # Cache the model and tokenizer using lru_cache
11
  @lru_cache(maxsize=1)
12
  def load_model_and_tokenizer():
13
- model_name = "./all-MiniLM-L6-v2" # Adjust if needed
14
  tokenizer = AutoTokenizer.from_pretrained(model_name)
15
  model = AutoModel.from_pretrained(model_name)
16
  return tokenizer, model
@@ -20,82 +19,19 @@ tokenizer, model = load_model_and_tokenizer()
20
 
21
  # Precompute label embeddings
22
  labels = [
23
- "aerospace", "anatomy", "anthropology", "art", "automotive", "blockchain",
24
- "biology", "chemistry", "cryptocurrency", "data science", "design", "e-commerce",
25
- "education", "engineering", "entertainment", "environment", "fashion", "finance",
26
- "food commerce", "gaming", "healthcare", "history", "information technology",
27
- "legal", "machine learning", "marketing", "medicine", "music", "philosophy",
28
- "physics", "politics", "real estate", "retail", "robotics", "social media",
29
- "sports", "technical", "tourism", "travel"
 
 
 
30
  ]
31
 
32
  tones = [
33
  "formal", "positive", "negative", "poetic", "polite", "subtle", "casual", "neutral",
34
- "informal", "pompous", "sustained", "rude"
35
- ]
36
-
37
- # Compute label embeddings
38
- def get_label_embeddings():
39
- with torch.no_grad():
40
- tokenized = tokenizer(labels, padding=True, truncation=True, return_tensors="pt")
41
- label_embeddings = model(**tokenized).last_hidden_state[:, 0, :].numpy()
42
- return label_embeddings
43
-
44
- label_embeddings = get_label_embeddings()
45
-
46
- def detect_context(text: str):
47
- # Encode input text
48
- tokenized = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
49
- with torch.no_grad():
50
- text_embedding = model(**tokenized).last_hidden_state[:, 0, :].numpy()
51
-
52
- # Compute similarity scores
53
- similarities = cosine_similarity(text_embedding, label_embeddings)[0]
54
-
55
- # Get best matching context
56
- best_index = np.argmax(similarities)
57
- detected_context = labels[best_index]
58
-
59
- return detected_context
60
-
61
- def process_and_translate(text: str):
62
- detected_context = detect_context(text)
63
- try:
64
- print(f"Sending to space_7: {text}") # Debugging
65
-
66
- translation_response = requests.post(
67
- "https://api.gradio.app/v2/Frenchizer/space_18/predict",
68
- json={"data": [text]} # Make sure this is correctly formatted
69
- )
70
-
71
- print("Raw response from space_7:", translation_response.text) # Debugging
72
-
73
- if translation_response.status_code != 200 or not translation_response.text.strip():
74
- return json.dumps({
75
- "error": "space_7 returned an empty response",
76
- "context": detected_context
77
- })
78
-
79
- response_json = translation_response.json()
80
- return json.dumps({
81
- "context": detected_context,
82
- "translation": response_json.get("data", [""])[0]
83
- })
84
-
85
- except Exception as e:
86
- return json.dumps({
87
- "error": f"Exception: {str(e)}",
88
- "context": detected_context
89
- })
90
-
91
-
92
-
93
- # Define Gradio interface
94
- with gr.Blocks() as interface:
95
- input_text = gr.Textbox(label="Input Text")
96
- output_json = gr.JSON(label="Context & Translation")
97
- process_button = gr.Button("Process & Translate")
98
- process_button.click(fn=process_and_translate, inputs=[input_text], outputs=[output_json])
99
-
100
- if __name__ == "__main__":
101
- interface.launch()
 
3
  from sklearn.metrics.pairwise import cosine_similarity
4
  import torch
5
  import numpy as np
6
+ from gradio_client import Client
 
7
  from functools import lru_cache
8
 
9
  # Cache the model and tokenizer using lru_cache
10
  @lru_cache(maxsize=1)
11
  def load_model_and_tokenizer():
12
+ model_name = "./all-MiniLM-L6-v2" # Replace with your Space and model path
13
  tokenizer = AutoTokenizer.from_pretrained(model_name)
14
  model = AutoModel.from_pretrained(model_name)
15
  return tokenizer, model
 
19
 
20
  # Precompute label embeddings
21
  labels = [
22
+ "aerospace", "anatomy", "anthropology", "art",
23
+ "automotive", "blockchain", "biology", "chemistry",
24
+ "cryptocurrency", "data science", "design", "e-commerce",
25
+ "education", "engineering", "entertainment", "environment",
26
+ "fashion", "finance", "food commerce", "gaming",
27
+ "healthcare", "history", "information technology",
28
+ "legal", "machine learning", "marketing", "medicine",
29
+ "music", "philosophy", "physics", "politics", "real estate", "retail",
30
+ "robotics", "social media", "sports", "technical",
31
+ "tourism", "travel"
32
  ]
33
 
34
  tones = [
35
  "formal", "positive", "negative", "poetic", "polite", "subtle", "casual", "neutral",
36
+ "informal", "pompous", "sustained", "rude", "sustained",
37
+ interface.launch()