Frenchizer commited on
Commit
a990647
·
verified ·
1 Parent(s): 0ccd95b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -28
app.py CHANGED
@@ -23,14 +23,26 @@ labels = [
23
  "automotive", "blockchain", "biology", "chemistry",
24
  "cryptocurrency", "data science", "design", "e-commerce",
25
  "education", "engineering", "entertainment", "environment",
26
- "fashion", "finance", "food commerce", "general",
27
- "gaming", "healthcare", "history", "html",
28
- "information technology", "IT", "keywords", "legal",
29
- "literature", "machine learning", "marketing", "medicine",
30
- "music", "personal development", "philosophy", "physics",
31
- "politics", "poetry", "programming", "real estate", "retail",
32
- "robotics", "slang", "social media", "speech", "sports",
33
- "sustained", "technical", "theater", "tourism", "travel"
 
 
 
 
 
 
 
 
 
 
 
 
34
  ]
35
 
36
  @lru_cache(maxsize=1)
@@ -48,7 +60,7 @@ def softmax(x):
48
  return exp_x / exp_x.sum()
49
 
50
  # Function to detect context
51
- def detect_context(input_text, top_n=3):
52
  # Encode the input text
53
  inputs = tokenizer([input_text], padding=True, truncation=True, return_tensors="pt")
54
  with torch.no_grad():
@@ -64,14 +76,14 @@ def detect_context(input_text, top_n=3):
64
  # Pair each label with its probability
65
  label_probabilities = list(zip(labels, probabilities))
66
 
67
- # Sort by probability in descending order
68
- label_probabilities.sort(key=lambda x: x[1], reverse=True)
69
 
70
- # Select the top N contexts
71
- top_contexts = label_probabilities[:top_n]
 
72
 
73
- # Return both the top N contexts and all context scores
74
- return top_contexts, label_probabilities
75
 
76
  # Translation client
77
  translation_client = Client("Frenchizer/space_7")
@@ -85,27 +97,21 @@ def process_request(input_text):
85
  translation = translate_text(input_text)
86
 
87
  # Step 2: Detect context
88
- top_contexts, all_contexts = detect_context(input_text)
89
 
90
- # Step 3: Print the list of high-confidence contexts and all context scores
91
- print("Detected Contexts (Top 3):", top_contexts)
92
- print("All Context Scores:")
93
- for context, score in all_contexts:
94
- print(f"- {context}: {score:.4f}")
95
 
96
  # Return the translation and contexts
97
- return translation, top_contexts, all_contexts
98
 
99
  # Gradio interface
100
  def gradio_interface(input_text):
101
- translation, top_contexts, all_contexts = process_request(input_text)
102
  # Format the output
103
- output = f"Translation: {translation}\n\nDetected Contexts (Top 3):\n"
104
- for context, score in top_contexts:
105
  output += f"- {context} (confidence: {score:.4f})\n"
106
- output += "\nAll Context Scores:\n"
107
- for context, score in all_contexts:
108
- output += f"- {context}: {score:.4f}\n"
109
  return output.strip()
110
 
111
  # Create the Gradio interface
 
23
  "automotive", "blockchain", "biology", "chemistry",
24
  "cryptocurrency", "data science", "design", "e-commerce",
25
  "education", "engineering", "entertainment", "environment",
26
+ "fashion", "finance", "food commerce", "gaming",
27
+ "healthcare", "history", "information technology",
28
+ "legal", "machine learning", "marketing", "medicine",
29
+ "music", "philosophy", "physics", "politics", "real estate", "retail",
30
+ "robotics", "social media", "sports", "technical",
31
+ "tourism", "travel"
32
+ ]
33
+
34
+ tones = [
35
+ "formal", "positive", "negative", "poetic", "polite", "subtle", "casual", "neutral",
36
+ "informal", "pompous", "sustained", "rude", "sustained",
37
+ "sophisticated", "playful", "serious", "friendly"
38
+ ]
39
+
40
+ styles = [
41
+ "poetry", "novel", "theater", "slang", "speech", "keywords", "html", "programming"
42
+ ]
43
+
44
+ gender_number = [
45
+ "masculine singular", "masculine plural", "feminine singular", "feminine plural"
46
  ]
47
 
48
  @lru_cache(maxsize=1)
 
60
  return exp_x / exp_x.sum()
61
 
62
  # Function to detect context
63
+ def detect_context(input_text, threshold=0.022):
64
  # Encode the input text
65
  inputs = tokenizer([input_text], padding=True, truncation=True, return_tensors="pt")
66
  with torch.no_grad():
 
76
  # Pair each label with its probability
77
  label_probabilities = list(zip(labels, probabilities))
78
 
79
+ # Filter contexts with confidence >= threshold
80
+ high_confidence_contexts = [(label, score) for label, score in label_probabilities if score >= threshold]
81
 
82
+ # If no contexts meet the threshold, default to "general"
83
+ if not high_confidence_contexts:
84
+ high_confidence_contexts = [("general", 1.0)] # Assign a default score of 1.0 for "general"
85
 
86
+ return high_confidence_contexts
 
87
 
88
  # Translation client
89
  translation_client = Client("Frenchizer/space_7")
 
97
  translation = translate_text(input_text)
98
 
99
  # Step 2: Detect context
100
+ context_results = detect_context(input_text)
101
 
102
+ # Step 3: Print the list of high-confidence contexts
103
+ print("High-confidence contexts (score >= 0.022):", context_results)
 
 
 
104
 
105
  # Return the translation and contexts
106
+ return translation, context_results
107
 
108
  # Gradio interface
109
  def gradio_interface(input_text):
110
+ translation, contexts = process_request(input_text)
111
  # Format the output
112
+ output = f"Translation: {translation}\n\nDetected Contexts (score >= 0.022):\n"
113
+ for context, score in contexts:
114
  output += f"- {context} (confidence: {score:.4f})\n"
 
 
 
115
  return output.strip()
116
 
117
  # Create the Gradio interface