Frenchizer commited on
Commit
34b88ee
·
verified ·
1 Parent(s): 5279dfc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -45
app.py CHANGED
@@ -2,65 +2,55 @@ import gradio as gr
2
  from transformers import pipeline
3
  import spacy
4
  from textblob import TextBlob
5
- import requests
6
  from gradio_client import Client
7
 
8
  # Initialize models
9
- nlp = spacy.load("en_core_web_sm") # More accurate NER
10
  spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
11
 
12
- def preprocess_and_forward(text: str) -> str:
13
- """
14
- Processes the input text and forwards it to the Gradio client for space_17.
15
- Returns only the final translated text.
16
- """
17
- processed_text, _ = preprocess_text(text)
18
- return forward_to_translation(processed_text)
19
-
20
  def preprocess_text(text: str):
21
  """
22
  Applies spell-checking and named entity recognition (NER) to preprocess text.
 
23
  """
24
- result = {
25
- "spell_suggestions": [],
26
- "entities": [],
27
- "tags": []
28
- }
29
-
30
- # Basic spell checking using TextBlob
31
- corrected_text = str(TextBlob(text).correct())
32
- if corrected_text != text:
33
- result["spell_suggestions"].append({
34
- "original": text,
35
- "corrected": corrected_text
36
- })
37
-
38
- # Transformer-based spell check
39
- spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
40
- if spell_checked != text and spell_checked != corrected_text:
41
- result["spell_suggestions"].append({
42
- "original": text,
43
- "corrected": spell_checked
44
- })
45
-
46
- # NER with spaCy
47
  doc = nlp(text)
48
- result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
 
 
49
 
50
- # Extract potential tags (hashtags, mentions, etc.)
51
- result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
52
-
53
- # Choose the best-corrected version
54
- final_text = spell_checked if spell_checked != text else corrected_text
55
-
56
- return final_text, result
 
57
 
58
  def forward_to_translation(text: str) -> str:
59
  """
60
- Sends preprocessed text to space_17 for translation and returns only the translated text.
61
  """
62
  client = Client("Frenchizer/space_17")
63
-
64
  try:
65
  return client.predict(text)
66
  except Exception as e:
@@ -69,9 +59,11 @@ def forward_to_translation(text: str) -> str:
69
  # Gradio interface
70
  with gr.Blocks() as demo:
71
  input_text = gr.Textbox(label="Input Text")
72
- output_text = gr.Textbox(label="Output Text") # Returns only text
 
 
73
  preprocess_button = gr.Button("Process")
74
- preprocess_button.click(fn=preprocess_and_forward, inputs=[input_text], outputs=[output_text])
75
 
76
  if __name__ == "__main__":
77
  demo.launch()
 
2
  from transformers import pipeline
3
  import spacy
4
  from textblob import TextBlob
 
5
  from gradio_client import Client
6
 
7
  # Initialize models
8
+ nlp = spacy.load("en_core_web_sm") # NER model
9
  spell_checker = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
10
 
 
 
 
 
 
 
 
 
11
  def preprocess_text(text: str):
12
  """
13
  Applies spell-checking and named entity recognition (NER) to preprocess text.
14
+ Returns token-level suggestions.
15
  """
16
+ tokens = text.split()
17
+ suggestions = []
18
+
19
+ for token in tokens:
20
+ token_suggestions = {"original": token, "suggestions": []}
21
+
22
+ # Basic spell checking
23
+ corrected = str(TextBlob(token).correct())
24
+ if corrected != token:
25
+ token_suggestions["suggestions"].append(corrected)
26
+
27
+ # Transformer-based spell checking
28
+ spell_checked = spell_checker(token, max_length=20)[0]['generated_text']
29
+ if spell_checked != token and spell_checked not in token_suggestions["suggestions"]:
30
+ token_suggestions["suggestions"].append(spell_checked)
31
+
32
+ suggestions.append(token_suggestions)
33
+
34
+ # Named Entity Recognition (NER)
 
 
 
 
35
  doc = nlp(text)
36
+ entities = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
37
+
38
+ return {"tokens": suggestions, "entities": entities}
39
 
40
+ def preprocess_and_forward(text: str):
41
+ """
42
+ Processes the input text, returns suggestions, and forwards the cleaned version for translation.
43
+ """
44
+ processed_data = preprocess_text(text)
45
+ final_text = " ".join([t['suggestions'][0] if t['suggestions'] else t['original'] for t in processed_data["tokens"]])
46
+ translation = forward_to_translation(final_text)
47
+ return {"suggestions": processed_data, "translation": translation}
48
 
49
  def forward_to_translation(text: str) -> str:
50
  """
51
+ Sends preprocessed text for translation and returns only the translated text.
52
  """
53
  client = Client("Frenchizer/space_17")
 
54
  try:
55
  return client.predict(text)
56
  except Exception as e:
 
59
  # Gradio interface
60
  with gr.Blocks() as demo:
61
  input_text = gr.Textbox(label="Input Text")
62
+ output_text = gr.Textbox(label="Translated Text")
63
+ suggestion_output = gr.JSON(label="Suggestions")
64
+
65
  preprocess_button = gr.Button("Process")
66
+ preprocess_button.click(fn=preprocess_and_forward, inputs=[input_text], outputs=[suggestion_output, output_text])
67
 
68
  if __name__ == "__main__":
69
  demo.launch()