Frenchizer commited on
Commit
7d8dd36
·
verified ·
1 Parent(s): 616a0d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -23
app.py CHANGED
@@ -14,8 +14,10 @@ def preprocess_capitalization(text: str) -> str:
14
  processed_words = []
15
 
16
  for word in words:
 
17
  if re.match(r"^[A-Z]+$", word):
18
  processed_words.append(word) # Leave acronyms unchanged
 
19
  elif re.search(r"[A-Z]", word) and re.search(r"[a-z]", word):
20
  processed_words.append(word[0].upper() + word[1:].lower()) # Correct capitalization
21
  else:
@@ -23,7 +25,7 @@ def preprocess_capitalization(text: str) -> str:
23
 
24
  return " ".join(processed_words)
25
 
26
- def preprocess_text(text: str, is_spell_corrected: bool = False):
27
  """Process text and return corrections with position information."""
28
  result = {
29
  "spell_suggestions": [],
@@ -42,22 +44,20 @@ def preprocess_text(text: str, is_spell_corrected: bool = False):
42
  })
43
  text = capitalized_text # Update text for further processing
44
 
45
- # Transformer spell check - only for words that look misspelled, skipped if spell corrected
46
- if not is_spell_corrected:
47
- spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
48
- if spell_checked != text:
49
- spell_words = spell_checked.split()
50
- text_words = text.split()
51
-
52
- # Only suggest spell corrections for words that are actually different
53
- for orig, corrected in zip(text_words, spell_words):
54
- if orig.lower() != corrected.lower(): # Compare case-insensitive
55
- result["spell_suggestions"].append({
56
- "original": orig,
57
- "corrected": corrected,
58
- "type": "spell"
59
- })
60
- text = spell_checked # Update text after spell correction
61
 
62
  # Add fluency/style suggestions (other suggestions)
63
  # Only add if the word isn't already in spell suggestions
@@ -69,8 +69,8 @@ def preprocess_text(text: str, is_spell_corrected: bool = False):
69
  if word in spell_originals or word.endswith(('.', '?', '!')):
70
  continue
71
 
72
- # Add style suggestion (like adding punctuation)
73
- if not word.endswith('!'):
74
  result["other_suggestions"].append({
75
  "original": word,
76
  "corrected": word + "!",
@@ -89,6 +89,7 @@ def translate_text(text: str):
89
  client = Client("Frenchizer/space_21")
90
  try:
91
  translation = client.predict(text)
 
92
  result = {
93
  "spell_suggestions": [],
94
  "other_suggestions": [],
@@ -99,10 +100,11 @@ def translate_text(text: str):
99
  except Exception as e:
100
  return f"Error: {str(e)}", {}
101
 
102
- def preprocess_and_forward(text: str, is_spell_corrected: bool = False):
103
  """Process text and forward to translation service."""
104
- original_text, preprocessing_result = preprocess_text(text, is_spell_corrected)
105
 
 
106
  client = Client("Frenchizer/space_21")
107
  try:
108
  translation = client.predict(original_text)
@@ -119,10 +121,9 @@ def translate_only(text: str):
119
  with gr.Blocks() as demo:
120
  with gr.Tab("Main"):
121
  input_text = gr.Textbox(label="Input Text")
122
- is_spell_corrected = gr.Checkbox(label="Spell Corrected", value=False, visible=False) # Hidden flag
123
  output_text = gr.Textbox(label="Output Text")
124
  preprocess_button = gr.Button("Process and Translate")
125
- preprocess_button.click(fn=preprocess_and_forward, inputs=[input_text, is_spell_corrected], outputs=output_text)
126
  with gr.Tab("Translation Only"):
127
  translate_input = gr.Textbox(label="Input Text")
128
  translate_output = gr.Textbox(label="Output Text")
 
14
  processed_words = []
15
 
16
  for word in words:
17
+ # Check if the word is an acronym (all uppercase letters)
18
  if re.match(r"^[A-Z]+$", word):
19
  processed_words.append(word) # Leave acronyms unchanged
20
+ # Check if the word has mixed capitalization (e.g., "HEllo")
21
  elif re.search(r"[A-Z]", word) and re.search(r"[a-z]", word):
22
  processed_words.append(word[0].upper() + word[1:].lower()) # Correct capitalization
23
  else:
 
25
 
26
  return " ".join(processed_words)
27
 
28
+ def preprocess_text(text: str):
29
  """Process text and return corrections with position information."""
30
  result = {
31
  "spell_suggestions": [],
 
44
  })
45
  text = capitalized_text # Update text for further processing
46
 
47
+ # Transformer spell check - only for words that look misspelled
48
+ spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
49
+ if spell_checked != text:
50
+ spell_words = spell_checked.split()
51
+ text_words = text.split()
52
+
53
+ # Only suggest spell corrections for words that are actually different
54
+ for orig, corrected in zip(text_words, spell_words):
55
+ if orig.lower() != corrected.lower(): # Compare case-insensitive
56
+ result["spell_suggestions"].append({
57
+ "original": orig,
58
+ "corrected": corrected,
59
+ "type": "spell"
60
+ })
 
 
61
 
62
  # Add fluency/style suggestions (other suggestions)
63
  # Only add if the word isn't already in spell suggestions
 
69
  if word in spell_originals or word.endswith(('.', '?', '!')):
70
  continue
71
 
72
+ # Add style suggestions (like adding punctuation)
73
+ if not word.endswith('!'): # Only suggest adding exclamation if it doesn't already have one
74
  result["other_suggestions"].append({
75
  "original": word,
76
  "corrected": word + "!",
 
89
  client = Client("Frenchizer/space_21")
90
  try:
91
  translation = client.predict(text)
92
+ # Return an empty suggestions object since we're just translating
93
  result = {
94
  "spell_suggestions": [],
95
  "other_suggestions": [],
 
100
  except Exception as e:
101
  return f"Error: {str(e)}", {}
102
 
103
+ def preprocess_and_forward(text: str):
104
  """Process text and forward to translation service."""
105
+ original_text, preprocessing_result = preprocess_text(text)
106
 
107
+ # Forward original text to translation service
108
  client = Client("Frenchizer/space_21")
109
  try:
110
  translation = client.predict(original_text)
 
121
  with gr.Blocks() as demo:
122
  with gr.Tab("Main"):
123
  input_text = gr.Textbox(label="Input Text")
 
124
  output_text = gr.Textbox(label="Output Text")
125
  preprocess_button = gr.Button("Process and Translate")
126
+ preprocess_button.click(fn=preprocess_and_forward, inputs=input_text, outputs=output_text)
127
  with gr.Tab("Translation Only"):
128
  translate_input = gr.Textbox(label="Input Text")
129
  translate_output = gr.Textbox(label="Output Text")