Frenchizer commited on
Commit
708dbc3
·
verified ·
1 Parent(s): 7d8dd36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -67
app.py CHANGED
@@ -14,10 +14,8 @@ def preprocess_capitalization(text: str) -> str:
14
  processed_words = []
15
 
16
  for word in words:
17
- # Check if the word is an acronym (all uppercase letters)
18
  if re.match(r"^[A-Z]+$", word):
19
  processed_words.append(word) # Leave acronyms unchanged
20
- # Check if the word has mixed capitalization (e.g., "HEllo")
21
  elif re.search(r"[A-Z]", word) and re.search(r"[a-z]", word):
22
  processed_words.append(word[0].upper() + word[1:].lower()) # Correct capitalization
23
  else:
@@ -25,11 +23,11 @@ def preprocess_capitalization(text: str) -> str:
25
 
26
  return " ".join(processed_words)
27
 
28
- def preprocess_text(text: str):
29
  """Process text and return corrections with position information."""
30
  result = {
31
  "spell_suggestions": [],
32
- "other_suggestions": [],
33
  "entities": [],
34
  "tags": []
35
  }
@@ -39,42 +37,27 @@ def preprocess_text(text: str):
39
  if capitalized_text != text:
40
  result["spell_suggestions"].append({
41
  "original": text,
42
- "corrected": capitalized_text,
43
- "type": "spell"
44
  })
45
  text = capitalized_text # Update text for further processing
46
 
47
- # Transformer spell check - only for words that look misspelled
48
- spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
49
- if spell_checked != text:
50
- spell_words = spell_checked.split()
51
- text_words = text.split()
52
-
53
- # Only suggest spell corrections for words that are actually different
54
- for orig, corrected in zip(text_words, spell_words):
55
- if orig.lower() != corrected.lower(): # Compare case-insensitive
56
- result["spell_suggestions"].append({
57
- "original": orig,
58
- "corrected": corrected,
59
- "type": "spell"
60
- })
61
 
62
- # Add fluency/style suggestions (other suggestions)
63
- # Only add if the word isn't already in spell suggestions
64
- spell_originals = {s["original"] for s in result["spell_suggestions"]}
65
  words = text.split()
66
-
67
  for word in words:
68
- # Skip words that have spell suggestions or already have punctuation
69
- if word in spell_originals or word.endswith(('.', '?', '!')):
70
- continue
71
-
72
- # Add style suggestions (like adding punctuation)
73
- if not word.endswith('!'): # Only suggest adding exclamation if it doesn't already have one
74
  result["other_suggestions"].append({
75
  "original": word,
76
- "corrected": word + "!",
77
- "type": "other"
78
  })
79
 
80
  # Add entities and tags
@@ -84,51 +67,25 @@ def preprocess_text(text: str):
84
 
85
  return text, result
86
 
87
- def translate_text(text: str):
88
- """Just translate the text without preprocessing."""
89
- client = Client("Frenchizer/space_21")
90
- try:
91
- translation = client.predict(text)
92
- # Return an empty suggestions object since we're just translating
93
- result = {
94
- "spell_suggestions": [],
95
- "other_suggestions": [],
96
- "entities": [],
97
- "tags": []
98
- }
99
- return translation, result
100
- except Exception as e:
101
- return f"Error: {str(e)}", {}
102
-
103
- def preprocess_and_forward(text: str):
104
  """Process text and forward to translation service."""
105
- original_text, preprocessing_result = preprocess_text(text)
106
 
107
  # Forward original text to translation service
108
- client = Client("Frenchizer/space_21")
109
  try:
110
  translation = client.predict(original_text)
111
  return translation, preprocessing_result
112
  except Exception as e:
113
  return f"Error: {str(e)}", preprocessing_result
114
 
115
- def translate_only(text: str):
116
- """Endpoint that only does translation without preprocessing or suggestions."""
117
- translation, empty_result = translate_text(text)
118
- return translation, empty_result
119
-
120
  # Gradio interface
121
  with gr.Blocks() as demo:
122
- with gr.Tab("Main"):
123
- input_text = gr.Textbox(label="Input Text")
124
- output_text = gr.Textbox(label="Output Text")
125
- preprocess_button = gr.Button("Process and Translate")
126
- preprocess_button.click(fn=preprocess_and_forward, inputs=input_text, outputs=output_text)
127
- with gr.Tab("Translation Only"):
128
- translate_input = gr.Textbox(label="Input Text")
129
- translate_output = gr.Textbox(label="Output Text")
130
- translate_button = gr.Button("Translate")
131
- translate_button.click(fn=translate_only, inputs=translate_input, outputs=translate_output)
132
-
133
  if __name__ == "__main__":
134
  demo.launch()
 
14
  processed_words = []
15
 
16
  for word in words:
 
17
  if re.match(r"^[A-Z]+$", word):
18
  processed_words.append(word) # Leave acronyms unchanged
 
19
  elif re.search(r"[A-Z]", word) and re.search(r"[a-z]", word):
20
  processed_words.append(word[0].upper() + word[1:].lower()) # Correct capitalization
21
  else:
 
23
 
24
  return " ".join(processed_words)
25
 
26
+ def preprocess_text(text: str, is_spell_corrected: bool = False):
27
  """Process text and return corrections with position information."""
28
  result = {
29
  "spell_suggestions": [],
30
+ "other_suggestions": [], # Added to distinguish style suggestions
31
  "entities": [],
32
  "tags": []
33
  }
 
37
  if capitalized_text != text:
38
  result["spell_suggestions"].append({
39
  "original": text,
40
+ "corrected": capitalized_text
 
41
  })
42
  text = capitalized_text # Update text for further processing
43
 
44
+ # Transformer spell check, skipped if already spell-corrected
45
+ if not is_spell_corrected:
46
+ spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
47
+ if spell_checked != text:
48
+ result["spell_suggestions"].append({
49
+ "original": text,
50
+ "corrected": spell_checked
51
+ })
52
+ text = spell_checked # Update text after spell correction
 
 
 
 
 
53
 
54
+ # Add style suggestions (other_suggestions)
 
 
55
  words = text.split()
 
56
  for word in words:
57
+ if not word.endswith(('.', '?', '!')): # Skip if already punctuated
 
 
 
 
 
58
  result["other_suggestions"].append({
59
  "original": word,
60
+ "corrected": word + "!"
 
61
  })
62
 
63
  # Add entities and tags
 
67
 
68
  return text, result
69
 
70
+ def preprocess_and_forward(text: str, is_spell_corrected: bool = False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  """Process text and forward to translation service."""
72
+ original_text, preprocessing_result = preprocess_text(text, is_spell_corrected)
73
 
74
  # Forward original text to translation service
75
+ client = Client("Frenchizer/space_17")
76
  try:
77
  translation = client.predict(original_text)
78
  return translation, preprocessing_result
79
  except Exception as e:
80
  return f"Error: {str(e)}", preprocessing_result
81
 
 
 
 
 
 
82
  # Gradio interface
83
  with gr.Blocks() as demo:
84
+ input_text = gr.Textbox(label="Input Text")
85
+ is_spell_corrected = gr.Checkbox(label="Spell Corrected", value=False, visible=False) # Hidden flag
86
+ output_text = gr.Textbox(label="Output Text")
87
+ preprocess_button = gr.Button("Process")
88
+ preprocess_button.click(fn=preprocess_and_forward, inputs=[input_text, is_spell_corrected], outputs=[output_text])
89
+
 
 
 
 
 
90
  if __name__ == "__main__":
91
  demo.launch()