Frenchizer commited on
Commit
abdc326
·
verified ·
1 Parent(s): 5f5871c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -30
app.py CHANGED
@@ -26,61 +26,57 @@ def preprocess_capitalization(text: str) -> str:
26
 
27
  return " ".join(processed_words)
28
 
29
- def find_differences(original: str, corrected: str):
30
- """Find differences between original and corrected text."""
31
- differences = []
32
- for i, (orig_char, corr_char) in enumerate(zip(original, corrected)):
33
- if orig_char != corr_char:
34
- differences.append({
35
- "position": i,
36
- "original": orig_char,
37
- "corrected": corr_char
38
- })
39
- return differences
40
-
41
  def preprocess_text(text: str):
42
  """Process text and return corrections with position information."""
43
  result = {
44
- "corrected_text": "",
45
- "differences": [],
46
- "entities": []
47
  }
48
 
49
  # Apply capitalization preprocessing
50
  capitalized_text = preprocess_capitalization(text)
51
  if capitalized_text != text:
52
- result["corrected_text"] = capitalized_text
53
- result["differences"] = find_differences(text, capitalized_text)
 
 
54
  text = capitalized_text # Update text for further processing
55
 
56
- # TextBlob spell check
 
 
 
57
  blob = TextBlob(text)
58
  corrected = str(blob.correct())
59
  if corrected != text:
60
- result["corrected_text"] = corrected
61
- result["differences"] = find_differences(text, corrected)
62
- text = corrected # Update text for further processing
63
-
 
64
  # Transformer spell check
65
  spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
66
  if spell_checked != text and spell_checked != corrected:
67
- result["corrected_text"] = spell_checked
68
- result["differences"] = find_differences(text, spell_checked)
 
 
69
 
70
- # Add entities
71
- doc = nlp(text)
72
  result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
 
73
 
74
- return result
75
 
76
  def preprocess_and_forward(text: str):
77
  """Process text and forward to translation service."""
78
- preprocessing_result = preprocess_text(text)
79
 
80
- # Forward corrected text to translation service
81
  client = Client("Frenchizer/space_17")
82
  try:
83
- translation = client.predict(preprocessing_result["corrected_text"])
84
  return translation, preprocessing_result
85
  except Exception as e:
86
  return f"Error: {str(e)}", preprocessing_result
 
26
 
27
  return " ".join(processed_words)
28
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def preprocess_text(text: str):
30
  """Process text and return corrections with position information."""
31
  result = {
32
+ "spell_suggestions": [],
33
+ "entities": [],
34
+ "tags": []
35
  }
36
 
37
  # Apply capitalization preprocessing
38
  capitalized_text = preprocess_capitalization(text)
39
  if capitalized_text != text:
40
+ result["spell_suggestions"].append({
41
+ "original": text,
42
+ "corrected": capitalized_text
43
+ })
44
  text = capitalized_text # Update text for further processing
45
 
46
+ # Find and record positions of corrections
47
+ doc = nlp(text)
48
+
49
+ # TextBlob spell check with position tracking
50
  blob = TextBlob(text)
51
  corrected = str(blob.correct())
52
  if corrected != text:
53
+ result["spell_suggestions"].append({
54
+ "original": text,
55
+ "corrected": corrected
56
+ })
57
+
58
  # Transformer spell check
59
  spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
60
  if spell_checked != text and spell_checked != corrected:
61
+ result["spell_suggestions"].append({
62
+ "original": text,
63
+ "corrected": spell_checked
64
+ })
65
 
66
+ # Add entities and tags
 
67
  result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
68
+ result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
69
 
70
+ return text, result
71
 
72
  def preprocess_and_forward(text: str):
73
  """Process text and forward to translation service."""
74
+ original_text, preprocessing_result = preprocess_text(text)
75
 
76
+ # Forward original text to translation service
77
  client = Client("Frenchizer/space_17")
78
  try:
79
+ translation = client.predict(original_text)
80
  return translation, preprocessing_result
81
  except Exception as e:
82
  return f"Error: {str(e)}", preprocessing_result