Spaces:

SuperSl6
/

Arabic-Text-Correction

Sleeping

SuperSl6 commited on Feb 3, 2025

Commit

102175e

verified ·

1 Parent(s): a827e42

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from transformers import pipeline, AutoTokenizer
 import gradio as gr
 # Load tokenizer with use_fast=False
 tokenizer = AutoTokenizer.from_pretrained("SuperSl6/Arabic-Text-Correction", use_fast=False)
@@ -12,12 +13,17 @@ model = pipeline(
 def correct_text(input_text):
     result = model(
         input_text,
-        max_length=50,               # Limit output length
-        no_repeat_ngram_size=2,      # Prevent repeating bigrams
-        repetition_penalty=1.5,      # Penalize repetitions
-        num_return_sequences=1       # Return a single output
     )[0]['generated_text']
-    return result
 # Gradio Interface
 interface = gr.Interface(

 from transformers import pipeline, AutoTokenizer
 import gradio as gr
+import re
 # Load tokenizer with use_fast=False
 tokenizer = AutoTokenizer.from_pretrained("SuperSl6/Arabic-Text-Correction", use_fast=False)
 def correct_text(input_text):
     result = model(
         input_text,
+        max_length=50,
+        no_repeat_ngram_size=2,
+        repetition_penalty=1.5,
+        num_return_sequences=1
     )[0]['generated_text']
+    # Extract the first occurrence of corrected Arabic word(s)
+    matches = re.findall(r'[\u0600-\u06FF]+', result)
+    corrected_text = matches[0] if matches else result
+    return corrected_text
 # Gradio Interface
 interface = gr.Interface(