Spaces:

SuperSl6
/

Arabic-Text-Correction

Sleeping

App Files Files Community

Arabic-Text-Correction / app.py

SuperSl6

Update app.py

228c780 verified 11 months ago

raw

history blame contribute delete

3.36 kB

	from transformers import pipeline, AutoTokenizer
	import gradio as gr
	import difflib

	# Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained("SuperSl6/Arabic-Text-Correction", use_fast=False)
	model = pipeline(
	"text2text-generation",
	model="SuperSl6/Arabic-Text-Correction",
	tokenizer=tokenizer
	)

	def align_and_preserve(original, corrected):
	original_words = original.split()
	corrected_words = corrected.split()

	matcher = difflib.SequenceMatcher(None, original_words, corrected_words)
	final_output = []
	seen_words = set()

	for opcode, a0, a1, b0, b1 in matcher.get_opcodes():
	if opcode == 'equal':
	for word in corrected_words[b0:b1]:
	if word not in seen_words:
	final_output.append(word)
	seen_words.add(word)
	elif opcode == 'delete':
	for word in original_words[a0:a1]:
	if word not in seen_words:
	final_output.append(word)
	seen_words.add(word)
	elif opcode == 'replace':
	for word in corrected_words[b0:b1]:
	if word not in seen_words:
	final_output.append(word)
	seen_words.add(word)
	for word in original_words[a0:a1]:
	if word not in seen_words:
	final_output.append(word)
	seen_words.add(word)

	for word in corrected_words[b1:]:
	if word not in seen_words:
	final_output.append(word)
	seen_words.add(word)

	return ' '.join(final_output)

	def extract_corrected_version(original, generated):
	sentences = generated.split(' . ')
	best_match = max(sentences, key=lambda s: difflib.SequenceMatcher(None, original, s).ratio())
	corrected_text = align_and_preserve(original, best_match.strip())
	return corrected_text

	def correct_text(input_text):
	result = model(
	input_text,
	max_length=50,
	no_repeat_ngram_size=2,
	repetition_penalty=1.5,
	num_return_sequences=1,
	temperature=0.7,
	top_p=0.9,
	do_sample=True
	)[0]['generated_text']

	corrected_text = extract_corrected_version(input_text, result)
	return corrected_text

	# Gradio Interface
	examples = [
	["اكيد ان لحكام العرب والمسلمين مسؤولية يتمثل ادناها في استدعاء السفراء في الصين للتشاور"],
	["هزا النص يحتوي على الكثير من الاخطاء الاملائية"],
	["هليكم السلام ورحمة الله وبركاته"],
	["انشاء الله سيكون كل شيء بخير"]
	]

	interface = gr.Interface(
	fn=correct_text,
	inputs=gr.Textbox(lines=4, placeholder="✍️ أدخل النص العربي هنا لتصحيحه...", label="📥 النص المدخل"),
	outputs=gr.Textbox(label="✅ النص المصحح"),
	title="🚀 تصحيح النص العربي باستخدام SuperSl6/Arabic-Text-Correction",
	description="📝 أداة ذكية لتصحيح النصوص العربية باستخدام تقنيات الذكاء الاصطناعي. أدخل النص وسيتم تصحيحه في الوقت الفعلي!",
	theme="compact",
	examples=examples,
	allow_flagging="never"
	)

	interface.launch()