Spaces:
Sleeping
Sleeping
| from transformers import pipeline, AutoTokenizer | |
| import gradio as gr | |
| import difflib | |
| # Load tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained("SuperSl6/Arabic-Text-Correction", use_fast=False) | |
| model = pipeline( | |
| "text2text-generation", | |
| model="SuperSl6/Arabic-Text-Correction", | |
| tokenizer=tokenizer | |
| ) | |
| def align_and_preserve(original, corrected): | |
| original_words = original.split() | |
| corrected_words = corrected.split() | |
| matcher = difflib.SequenceMatcher(None, original_words, corrected_words) | |
| final_output = [] | |
| seen_words = set() | |
| for opcode, a0, a1, b0, b1 in matcher.get_opcodes(): | |
| if opcode == 'equal': | |
| for word in corrected_words[b0:b1]: | |
| if word not in seen_words: | |
| final_output.append(word) | |
| seen_words.add(word) | |
| elif opcode == 'delete': | |
| for word in original_words[a0:a1]: | |
| if word not in seen_words: | |
| final_output.append(word) | |
| seen_words.add(word) | |
| elif opcode == 'replace': | |
| for word in corrected_words[b0:b1]: | |
| if word not in seen_words: | |
| final_output.append(word) | |
| seen_words.add(word) | |
| for word in original_words[a0:a1]: | |
| if word not in seen_words: | |
| final_output.append(word) | |
| seen_words.add(word) | |
| for word in corrected_words[b1:]: | |
| if word not in seen_words: | |
| final_output.append(word) | |
| seen_words.add(word) | |
| return ' '.join(final_output) | |
| def extract_corrected_version(original, generated): | |
| sentences = generated.split(' . ') | |
| best_match = max(sentences, key=lambda s: difflib.SequenceMatcher(None, original, s).ratio()) | |
| corrected_text = align_and_preserve(original, best_match.strip()) | |
| return corrected_text | |
| def correct_text(input_text): | |
| result = model( | |
| input_text, | |
| max_length=50, | |
| no_repeat_ngram_size=2, | |
| repetition_penalty=1.5, | |
| num_return_sequences=1, | |
| temperature=0.7, | |
| top_p=0.9, | |
| do_sample=True | |
| )[0]['generated_text'] | |
| corrected_text = extract_corrected_version(input_text, result) | |
| return corrected_text | |
| # Gradio Interface | |
| examples = [ | |
| ["اكيد ان لحكام العرب والمسلمين مسؤولية يتمثل ادناها في استدعاء السفراء في الصين للتشاور"], | |
| ["هزا النص يحتوي على الكثير من الاخطاء الاملائية"], | |
| ["هليكم السلام ورحمة الله وبركاته"], | |
| ["انشاء الله سيكون كل شيء بخير"] | |
| ] | |
| interface = gr.Interface( | |
| fn=correct_text, | |
| inputs=gr.Textbox(lines=4, placeholder="✍️ أدخل النص العربي هنا لتصحيحه...", label="📥 النص المدخل"), | |
| outputs=gr.Textbox(label="✅ النص المصحح"), | |
| title="🚀 تصحيح النص العربي باستخدام SuperSl6/Arabic-Text-Correction", | |
| description="📝 أداة ذكية لتصحيح النصوص العربية باستخدام تقنيات الذكاء الاصطناعي. أدخل النص وسيتم تصحيحه في الوقت الفعلي!", | |
| theme="compact", | |
| examples=examples, | |
| allow_flagging="never" | |
| ) | |
| interface.launch() |