Spaces:

bayan10
/

bayan-api

Running

App Files Files Community

bayan-api / tests /test_full.py

youssefreda9

ui: Clean up editor placeholder text and alignment (top-right)

dfe1d91 11 days ago

Raw

History Blame Contribute Delete

5.6 kB

	import difflib
	import re

	def get_word_positions(text):
	positions = []
	for m in re.finditer(r'\S+', text):
	positions.append((m.group(), m.start(), m.end()))
	return positions

	class OffsetMapper:
	def __init__(self, original, modified):
	self.original = original
	self.modified = modified
	self.mapping = []
	self._build_mapping()

	def _build_mapping(self):
	s = difflib.SequenceMatcher(None, self.original, self.modified)
	for tag, i1, i2, j1, j2 in s.get_opcodes():
	self.mapping.append((j1, j2, i1, i2))

	def map_offset(self, mod_offset):
	for j1, j2, i1, i2 in self.mapping:
	if j1 <= mod_offset <= j2:
	if j2 == j1:
	return i1
	ratio = (mod_offset - j1) / (j2 - j1)
	return int(i1 + ratio * (i2 - i1))
	return len(self.original)

	def get_word_diffs(original, corrected):
	orig_words = get_word_positions(original)
	corr_words = get_word_positions(corrected)
	s = difflib.SequenceMatcher(None, [w[0] for w in orig_words], [w[0] for w in corr_words])
	suggestions = []
	for tag, i1, i2, j1, j2 in s.get_opcodes():
	if tag == 'replace':
	if i1 < len(orig_words) and i2 - 1 < len(orig_words):
	start_char = orig_words[i1][1]
	end_char = orig_words[i2-1][2]
	suggestions.append({
	'start': start_char,
	'end': end_char,
	'original': original[start_char:end_char],
	'correction': " ".join([w[0] for w in corr_words[j1:j2]])
	})
	elif tag == 'delete':
	if i1 < len(orig_words) and i2 - 1 < len(orig_words):
	start_char = orig_words[i1][1]
	end_char = orig_words[i2-1][2]
	suggestions.append({
	'start': start_char,
	'end': end_char,
	'original': original[start_char:end_char],
	'correction': ''
	})
	elif tag == 'insert':
	pos = orig_words[i1][1] if i1 < len(orig_words) else len(original)
	suggestions.append({
	'start': pos,
	'end': pos,
	'original': '',
	'correction': " ".join([w[0] for w in corr_words[j1:j2]])
	})
	return suggestions

	def test():
	original = "قال محمد علي أننا حققنا نجاحا كبيرا في المشروع رغم الصعوباالصعوبات...."
	spelling_corrected = "قال محمد علي أننا حققنا نجاحا كبيرا في المشروع رغم الصعوباالصعوبات...."
	# Wait, in the log, spelling DROPPED [4:9] محمد:. This means original was "قال محمد: علي ..."
	original = "قال محمد: علي أننا حققنا نجاحا كبيرا في المشروع رغم الصعوباالصعوبات...."
	spelling_corrected = "قال محمد علي أننا حققنا نجاحا كبيرا في المشروع رغم الصعوباالصعوبات...."
	grammar_corrected = "قال محمد علي أننا حققنا نجاحا كبيرا في المشروع رغم الصعوبات..."
	punct_corrected = "قال محمد علي: أننا حققنا نجاحا كبيرا في المشروع رغم الصعوبات...."

	suggestions = []
	mappers = []

	# 1. Spelling
	# Assume spelling generated the diff exactly as SequenceMatcher would:
	diffs = get_word_diffs(original, spelling_corrected)
	for d in diffs:
	d['type'] = 'spelling'
	suggestions.append(d)
	mappers.append(OffsetMapper(original, spelling_corrected))

	# 2. Grammar
	def map_range_to_original(start, end):
	curr_start, curr_end = start, end
	for mapper in reversed(mappers):
	curr_start = mapper.map_offset(curr_start)
	curr_end = mapper.map_offset(curr_end)
	return curr_start, curr_end

	diffs = get_word_diffs(spelling_corrected, grammar_corrected)
	for d in diffs:
	o_s, o_e = map_range_to_original(d['start'], d['end'])
	suggestions.append({
	'start': o_s, 'end': o_e, 'type': 'grammar',
	'original': original[o_s:o_e], 'correction': d['correction']
	})
	mappers.append(OffsetMapper(spelling_corrected, grammar_corrected))

	# 3. Punctuation
	diffs = get_word_diffs(grammar_corrected, punct_corrected)
	for d in diffs:
	o_s, o_e = map_range_to_original(d['start'], d['end'])
	suggestions.append({
	'start': o_s, 'end': o_e, 'type': 'punctuation',
	'original': original[o_s:o_e], 'correction': d['correction']
	})

	print("ALL SUGGESTIONS:")
	for s in suggestions:
	print(s)

	PRIORITY = {'grammar': 3, 'punctuation': 2, 'spelling': 1, 'autocomplete': 0}
	suggestions.sort(key=lambda s: PRIORITY.get(s['type'], 0), reverse=True)
	claimed_ranges = []
	resolved = []
	for s in suggestions:
	overlaps = False
	for (c_start, c_end, c_type) in claimed_ranges:
	if s['start'] < c_end and s['end'] > c_start:
	overlaps = True
	print(f"[OVERLAP] Dropped {s['type']} [{s['start']}:{s['end']}] '{s['original']}' — conflicts with {c_type}")
	break
	if not overlaps:
	resolved.append(s)
	claimed_ranges.append((s['start'], s['end'], s['type']))

	if __name__ == "__main__":
	test()