Frenchizer commited on
Commit
5f5871c
·
verified ·
1 Parent(s): 4524238

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -26
app.py CHANGED
@@ -26,57 +26,61 @@ def preprocess_capitalization(text: str) -> str:
26
 
27
  return " ".join(processed_words)
28
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def preprocess_text(text: str):
30
  """Process text and return corrections with position information."""
31
  result = {
32
- "spell_suggestions": [],
33
- "entities": [],
34
- "tags": []
35
  }
36
 
37
  # Apply capitalization preprocessing
38
  capitalized_text = preprocess_capitalization(text)
39
  if capitalized_text != text:
40
- result["spell_suggestions"].append({
41
- "original": text,
42
- "corrected": capitalized_text
43
- })
44
  text = capitalized_text # Update text for further processing
45
 
46
- # Find and record positions of corrections
47
- doc = nlp(text)
48
-
49
- # TextBlob spell check with position tracking
50
  blob = TextBlob(text)
51
  corrected = str(blob.correct())
52
  if corrected != text:
53
- result["spell_suggestions"].append({
54
- "original": text,
55
- "corrected": corrected
56
- })
57
-
58
  # Transformer spell check
59
  spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
60
  if spell_checked != text and spell_checked != corrected:
61
- result["spell_suggestions"].append({
62
- "original": text,
63
- "corrected": spell_checked
64
- })
65
 
66
- # Add entities and tags
 
67
  result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
68
- result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
69
 
70
- return text, result
71
 
72
  def preprocess_and_forward(text: str):
73
  """Process text and forward to translation service."""
74
- original_text, preprocessing_result = preprocess_text(text)
75
 
76
- # Forward original text to translation service
77
  client = Client("Frenchizer/space_17")
78
  try:
79
- translation = client.predict(original_text)
80
  return translation, preprocessing_result
81
  except Exception as e:
82
  return f"Error: {str(e)}", preprocessing_result
 
26
 
27
  return " ".join(processed_words)
28
 
29
+ def find_differences(original: str, corrected: str):
30
+ """Find differences between original and corrected text."""
31
+ differences = []
32
+ for i, (orig_char, corr_char) in enumerate(zip(original, corrected)):
33
+ if orig_char != corr_char:
34
+ differences.append({
35
+ "position": i,
36
+ "original": orig_char,
37
+ "corrected": corr_char
38
+ })
39
+ return differences
40
+
41
  def preprocess_text(text: str):
42
  """Process text and return corrections with position information."""
43
  result = {
44
+ "corrected_text": "",
45
+ "differences": [],
46
+ "entities": []
47
  }
48
 
49
  # Apply capitalization preprocessing
50
  capitalized_text = preprocess_capitalization(text)
51
  if capitalized_text != text:
52
+ result["corrected_text"] = capitalized_text
53
+ result["differences"] = find_differences(text, capitalized_text)
 
 
54
  text = capitalized_text # Update text for further processing
55
 
56
+ # TextBlob spell check
 
 
 
57
  blob = TextBlob(text)
58
  corrected = str(blob.correct())
59
  if corrected != text:
60
+ result["corrected_text"] = corrected
61
+ result["differences"] = find_differences(text, corrected)
62
+ text = corrected # Update text for further processing
63
+
 
64
  # Transformer spell check
65
  spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
66
  if spell_checked != text and spell_checked != corrected:
67
+ result["corrected_text"] = spell_checked
68
+ result["differences"] = find_differences(text, spell_checked)
 
 
69
 
70
+ # Add entities
71
+ doc = nlp(text)
72
  result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
 
73
 
74
+ return result
75
 
76
  def preprocess_and_forward(text: str):
77
  """Process text and forward to translation service."""
78
+ preprocessing_result = preprocess_text(text)
79
 
80
+ # Forward corrected text to translation service
81
  client = Client("Frenchizer/space_17")
82
  try:
83
+ translation = client.predict(preprocessing_result["corrected_text"])
84
  return translation, preprocessing_result
85
  except Exception as e:
86
  return f"Error: {str(e)}", preprocessing_result