space_22

Sleeping

App Files Files Community

Frenchizer commited on Feb 15, 2025

Commit

5f5871c

verified ·

1 Parent(s): 4524238

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -26

app.py CHANGED Viewed

@@ -26,57 +26,61 @@ def preprocess_capitalization(text: str) -> str:
     return " ".join(processed_words)
 def preprocess_text(text: str):
     """Process text and return corrections with position information."""
     result = {
-        "spell_suggestions": [],
-        "entities": [],
-        "tags": []
     }
     # Apply capitalization preprocessing
     capitalized_text = preprocess_capitalization(text)
     if capitalized_text != text:
-        result["spell_suggestions"].append({
-            "original": text,
-            "corrected": capitalized_text
-        })
         text = capitalized_text  # Update text for further processing
-    # Find and record positions of corrections
-    doc = nlp(text)
-    # TextBlob spell check with position tracking
     blob = TextBlob(text)
     corrected = str(blob.correct())
     if corrected != text:
-        result["spell_suggestions"].append({
-            "original": text,
-            "corrected": corrected
-        })
     # Transformer spell check
     spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
     if spell_checked != text and spell_checked != corrected:
-        result["spell_suggestions"].append({
-            "original": text,
-            "corrected": spell_checked
-        })
-    # Add entities and tags
     result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
-    result["tags"] = [token.text for token in doc if token.text.startswith(('#', '@'))]
-    return text, result
 def preprocess_and_forward(text: str):
     """Process text and forward to translation service."""
-    original_text, preprocessing_result = preprocess_text(text)
-    # Forward original text to translation service
     client = Client("Frenchizer/space_17")
     try:
-        translation = client.predict(original_text)
         return translation, preprocessing_result
     except Exception as e:
         return f"Error: {str(e)}", preprocessing_result

     return " ".join(processed_words)
+def find_differences(original: str, corrected: str):
+    """Find differences between original and corrected text."""
+    differences = []
+    for i, (orig_char, corr_char) in enumerate(zip(original, corrected)):
+        if orig_char != corr_char:
+            differences.append({
+                "position": i,
+                "original": orig_char,
+                "corrected": corr_char
+            })
+    return differences
 def preprocess_text(text: str):
     """Process text and return corrections with position information."""
     result = {
+        "corrected_text": "",
+        "differences": [],
+        "entities": []
     }
     # Apply capitalization preprocessing
     capitalized_text = preprocess_capitalization(text)
     if capitalized_text != text:
+        result["corrected_text"] = capitalized_text
+        result["differences"] = find_differences(text, capitalized_text)
         text = capitalized_text  # Update text for further processing
+    # TextBlob spell check
     blob = TextBlob(text)
     corrected = str(blob.correct())
     if corrected != text:
+        result["corrected_text"] = corrected
+        result["differences"] = find_differences(text, corrected)
+        text = corrected  # Update text for further processing
     # Transformer spell check
     spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
     if spell_checked != text and spell_checked != corrected:
+        result["corrected_text"] = spell_checked
+        result["differences"] = find_differences(text, spell_checked)
+    # Add entities
+    doc = nlp(text)
     result["entities"] = [{"text": ent.text, "label": ent.label_} for ent in doc.ents]
+    return result
 def preprocess_and_forward(text: str):
     """Process text and forward to translation service."""
+    preprocessing_result = preprocess_text(text)
+    # Forward corrected text to translation service
     client = Client("Frenchizer/space_17")
     try:
+        translation = client.predict(preprocessing_result["corrected_text"])
         return translation, preprocessing_result
     except Exception as e:
         return f"Error: {str(e)}", preprocessing_result