Frenchizer commited on
Commit
3ee1970
·
verified ·
1 Parent(s): 43942b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -127
app.py CHANGED
@@ -3,9 +3,11 @@ from transformers import pipeline
3
  import spacy
4
  from gradio_client import Client
5
  import re
 
6
  # Initialize models
7
  nlp = spacy.load("en_core_web_sm")
8
  spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
 
9
  def preprocess_capitalization(text: str) -> str:
10
  """Preprocess input text to handle capitalization rules."""
11
  words = text.split(" ")
@@ -22,6 +24,7 @@ def preprocess_capitalization(text: str) -> str:
22
  processed_words.append(word) # Leave other words unchanged
23
 
24
  return " ".join(processed_words)
 
25
  def preprocess_text(text: str):
26
  """Process text and return corrections with position information."""
27
  result = {
@@ -30,6 +33,7 @@ def preprocess_text(text: str):
30
  "entities": [],
31
  "tags": []
32
  }
 
33
  # Apply capitalization preprocessing
34
  capitalized_text = preprocess_capitalization(text)
35
  if capitalized_text != text:
@@ -39,6 +43,7 @@ def preprocess_text(text: str):
39
  "type": "spell"
40
  })
41
  text = capitalized_text # Update text for further processing
 
42
  # Transformer spell check - only for words that look misspelled
43
  spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
44
  if spell_checked != text:
@@ -53,6 +58,7 @@ def preprocess_text(text: str):
53
  "corrected": corrected,
54
  "type": "spell"
55
  })
 
56
  # Add fluency/style suggestions (other suggestions)
57
  # Only add if the word isn't already in spell suggestions
58
  spell_originals = {s["original"] for s in result["spell_suggestions"]}
@@ -70,11 +76,14 @@ def preprocess_text(text: str):
70
  "corrected": word + "!",
71
  "type": "other"
72
  })
 
73
  # Add entities and tags
74
  doc = nlp(text)
75
  result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
76
  result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
 
77
  return text, result
 
78
  def translate_text(text: str):
79
  """Just translate the text without preprocessing."""
80
  client = Client("Frenchizer/space_21")
@@ -90,6 +99,7 @@ def translate_text(text: str):
90
  return translation, result
91
  except Exception as e:
92
  return f"Error: {str(e)}", {}
 
93
  def preprocess_and_forward(text: str):
94
  """Process text and forward to translation service."""
95
  original_text, preprocessing_result = preprocess_text(text)
@@ -101,139 +111,24 @@ def preprocess_and_forward(text: str):
101
  return translation, preprocessing_result
102
  except Exception as e:
103
  return f"Error: {str(e)}", preprocessing_result
 
104
  def translate_only(text: str):
105
  """Endpoint that only does translation without preprocessing or suggestions."""
106
  translation, empty_result = translate_text(text)
107
  return translation, empty_result
108
 
109
- # Function to format and display suggestions
110
- def format_suggestions(preprocessing_result):
111
- if not preprocessing_result:
112
- return "No suggestions available."
113
-
114
- formatted_output = []
115
-
116
- # Format spell suggestions
117
- if preprocessing_result["spell_suggestions"]:
118
- formatted_output.append("## Spelling Suggestions")
119
- for suggestion in preprocessing_result["spell_suggestions"]:
120
- formatted_output.append(f"• '{suggestion['original']}' → '{suggestion['corrected']}'")
121
-
122
- # Format other suggestions
123
- if preprocessing_result["other_suggestions"]:
124
- formatted_output.append("## Style Suggestions")
125
- for suggestion in preprocessing_result["other_suggestions"]:
126
- formatted_output.append(f"• '{suggestion['original']}' → '{suggestion['corrected']}'")
127
-
128
- # Format entities
129
- if preprocessing_result["entities"]:
130
- formatted_output.append("## Detected Entities")
131
- for entity in preprocessing_result["entities"]:
132
- formatted_output.append(f"• {entity['text']} ({entity['label']})")
133
-
134
- # Format tags
135
- if preprocessing_result["tags"]:
136
- formatted_output.append("## Detected Tags")
137
- for tag in preprocessing_result["tags"]:
138
- formatted_output.append(f"• {tag}")
139
-
140
- if not formatted_output:
141
- return "No suggestions available."
142
-
143
- return "\n".join(formatted_output)
144
-
145
  # Gradio interface
146
- with gr.Blocks(title="Text Processor & Translator") as demo:
147
- gr.Markdown("# Text Processing and Translation Tool")
148
- gr.Markdown("Process, correct, and translate your text with advanced NLP features.")
149
-
150
- with gr.Tab("Full Processing"):
151
- with gr.Row():
152
- with gr.Column():
153
- input_text_full = gr.Textbox(label="Input Text", placeholder="Enter text to process and translate...", lines=5)
154
- process_translate_button = gr.Button("Process & Translate", variant="primary")
155
-
156
- with gr.Column():
157
- output_text_full = gr.Textbox(label="Translated Text", lines=5)
158
- suggestions_markdown = gr.Markdown(label="Suggestions")
159
-
160
- # Connect the full processing pipeline
161
- def process_and_display(text):
162
- translation, preprocessing_result = preprocess_and_forward(text)
163
- suggestions = format_suggestions(preprocessing_result)
164
- return translation, suggestions
165
-
166
- process_translate_button.click(
167
- fn=process_and_display,
168
- inputs=[input_text_full],
169
- outputs=[output_text_full, suggestions_markdown]
170
- )
171
-
172
  with gr.Tab("Translation Only"):
173
- with gr.Row():
174
- with gr.Column():
175
- input_text_translate = gr.Textbox(label="Input Text", placeholder="Enter text to translate only...", lines=5)
176
- translate_button = gr.Button("Translate", variant="primary")
177
-
178
- with gr.Column():
179
- output_text_translate = gr.Textbox(label="Translated Text", lines=5)
180
-
181
- # Connect translation-only pipeline
182
- translate_button.click(
183
- fn=translate_only,
184
- inputs=[input_text_translate],
185
- outputs=[output_text_translate]
186
- )
187
-
188
- with gr.Tab("Text Analysis Only"):
189
- with gr.Row():
190
- with gr.Column():
191
- input_text_analysis = gr.Textbox(label="Input Text", placeholder="Enter text to analyze...", lines=5)
192
- analyze_button = gr.Button("Analyze Text", variant="primary")
193
-
194
- with gr.Column():
195
- analysis_markdown = gr.Markdown(label="Analysis Results")
196
-
197
- # Connect analysis-only pipeline
198
- def analyze_only(text):
199
- _, preprocessing_result = preprocess_text(text)
200
- suggestions = format_suggestions(preprocessing_result)
201
- return suggestions
202
-
203
- analyze_button.click(
204
- fn=analyze_only,
205
- inputs=[input_text_analysis],
206
- outputs=[analysis_markdown]
207
- )
208
-
209
- with gr.Tab("Help"):
210
- gr.Markdown("""
211
- # How to Use This Tool
212
-
213
- This application provides three main functionalities:
214
-
215
- ## 1. Full Processing
216
- - Corrects spelling and capitalization errors
217
- - Identifies named entities and tags
218
- - Offers style suggestions
219
- - Translates the processed text to French
220
-
221
- ## 2. Translation Only
222
- - Directly translates your text to French without any preprocessing
223
- - Useful when you just need a quick translation
224
-
225
- ## 3. Text Analysis Only
226
- - Analyzes your text for errors and improvement opportunities
227
- - Identifies named entities and tags
228
- - Doesn't perform any translation
229
-
230
- ## Features
231
- - **Spelling Correction**: Identifies and corrects spelling errors
232
- - **Capitalization Fixes**: Corrects improper capitalization while preserving acronyms
233
- - **Style Suggestions**: Offers improvement suggestions for better writing
234
- - **Entity Recognition**: Identifies people, organizations, locations, etc.
235
- - **Tag Detection**: Finds hashtags and mentions in your text
236
- """)
237
-
238
  if __name__ == "__main__":
239
  demo.launch()
 
3
  import spacy
4
  from gradio_client import Client
5
  import re
6
+
7
  # Initialize models
8
  nlp = spacy.load("en_core_web_sm")
9
  spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
10
+
11
  def preprocess_capitalization(text: str) -> str:
12
  """Preprocess input text to handle capitalization rules."""
13
  words = text.split(" ")
 
24
  processed_words.append(word) # Leave other words unchanged
25
 
26
  return " ".join(processed_words)
27
+
28
  def preprocess_text(text: str):
29
  """Process text and return corrections with position information."""
30
  result = {
 
33
  "entities": [],
34
  "tags": []
35
  }
36
+
37
  # Apply capitalization preprocessing
38
  capitalized_text = preprocess_capitalization(text)
39
  if capitalized_text != text:
 
43
  "type": "spell"
44
  })
45
  text = capitalized_text # Update text for further processing
46
+
47
  # Transformer spell check - only for words that look misspelled
48
  spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
49
  if spell_checked != text:
 
58
  "corrected": corrected,
59
  "type": "spell"
60
  })
61
+
62
  # Add fluency/style suggestions (other suggestions)
63
  # Only add if the word isn't already in spell suggestions
64
  spell_originals = {s["original"] for s in result["spell_suggestions"]}
 
76
  "corrected": word + "!",
77
  "type": "other"
78
  })
79
+
80
  # Add entities and tags
81
  doc = nlp(text)
82
  result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
83
  result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
84
+
85
  return text, result
86
+
87
  def translate_text(text: str):
88
  """Just translate the text without preprocessing."""
89
  client = Client("Frenchizer/space_21")
 
99
  return translation, result
100
  except Exception as e:
101
  return f"Error: {str(e)}", {}
102
+
103
  def preprocess_and_forward(text: str):
104
  """Process text and forward to translation service."""
105
  original_text, preprocessing_result = preprocess_text(text)
 
111
  return translation, preprocessing_result
112
  except Exception as e:
113
  return f"Error: {str(e)}", preprocessing_result
114
+
115
  def translate_only(text: str):
116
  """Endpoint that only does translation without preprocessing or suggestions."""
117
  translation, empty_result = translate_text(text)
118
  return translation, empty_result
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  # Gradio interface
121
+ with gr.Blocks() as demo:
122
+ with gr.Tab("Main"):
123
+ input_text = gr.Textbox(label="Input Text")
124
+ output_text = gr.Textbox(label="Output Text")
125
+ preprocess_button = gr.Button("Process and Translate")
126
+ preprocess_button.click(fn=preprocess_and_forward, inputs=input_text, outputs=output_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  with gr.Tab("Translation Only"):
128
+ translate_input = gr.Textbox(label="Input Text")
129
+ translate_output = gr.Textbox(label="Output Text")
130
+ translate_button = gr.Button("Translate")
131
+ translate_button.click(fn=translate_only, inputs=translate_input, outputs=translate_output)
132
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  if __name__ == "__main__":
134
  demo.launch()