| import math
|
| import re
|
| import logging
|
|
|
| class ArabicBubbleFormatter:
|
| def __init__(self):
|
|
|
| self.target_ratios = {
|
| 2: [0.50, 0.50],
|
| 3: [0.30, 0.40, 0.30],
|
| 4: [0.20, 0.30, 0.30, 0.20],
|
| 5: [0.15, 0.25, 0.30, 0.25, 0.15],
|
| 6: [0.12, 0.18, 0.25, 0.25, 0.18, 0.12]
|
| }
|
|
|
|
|
| self.min_single_word_length = 6
|
| self.max_line_chars = 30
|
| self.optimal_line_chars = 25
|
| self.max_words_per_line = 8
|
|
|
|
|
| self.perfect_score = 100
|
| self.good_score = 85
|
| self.acceptable_score = 70
|
| self.poor_score = 50
|
|
|
| def _preprocess_text(self, text: str) -> str:
|
| """معالجة مسبقة للنص"""
|
| text = re.sub(r'\s+', ' ', text).strip()
|
| return text
|
|
|
| def _get_line_character_length(self, words: list) -> int:
|
| """حساب عدد الأحرف في مجموعة من الكلمات مع المسافات"""
|
| if not words:
|
| return 0
|
| return sum(len(word) for word in words) + len(words) - 1
|
|
|
| def _calculate_total_characters(self, words: list) -> int:
|
| """حساب إجمالي الأحرف في النص"""
|
| return sum(len(word) for word in words) + len(words) - 1
|
|
|
| def _evaluate_line_distribution(self, lines: list) -> tuple:
|
| """تقييم توزيع الأسطر وإرجاع النتيجة مع التفاصيل"""
|
| if not lines:
|
| return 0, "لا توجد أسطر"
|
|
|
| num_lines = len(lines)
|
| line_lengths = [len(line) for line in lines]
|
| line_word_counts = [len(line.split()) for line in lines]
|
| total_chars = sum(line_lengths)
|
|
|
| score = self.perfect_score
|
| details = []
|
|
|
|
|
| for i, line in enumerate(lines):
|
| words = line.split()
|
| if len(words) == 1 and len(words[0]) < self.min_single_word_length:
|
| score -= 80
|
| details.append(f"❌ السطر {i+1}: كلمة واحدة قصيرة ({len(words[0])} أحرف)")
|
| return score, f"فشل: {'; '.join(details)}"
|
|
|
|
|
| for i, length in enumerate(line_lengths):
|
| if length > self.max_line_chars:
|
| score -= 20
|
| details.append(f"⚠️ السطر {i+1}: طويل جداً ({length} حرف)")
|
| elif length > self.optimal_line_chars:
|
| score -= 10
|
| details.append(f"⚠️ السطر {i+1}: أطول من المثالي ({length} حرف)")
|
|
|
|
|
| if num_lines == 1:
|
|
|
| if total_chars <= 30:
|
| details.append("✅ سطر واحد مثالي للنص القصير")
|
| return score, f"ممتاز: {'; '.join(details)}"
|
| else:
|
| score -= 30
|
| details.append("⚠️ النص طويل للسطر الواحد")
|
|
|
| elif num_lines == 2:
|
|
|
| ratio1 = line_lengths[0] / total_chars
|
| ratio2 = line_lengths[1] / total_chars
|
|
|
|
|
| if abs(ratio1 - 0.5) <= 0.01:
|
| score += 10
|
| details.append("✅ توزيع مثالي 50-50")
|
| elif abs(ratio1 - 0.5) <= 0.05:
|
| details.append("✅ توزيع جيد متقارب")
|
| elif abs(ratio1 - 0.55) <= 0.01 or abs(ratio1 - 0.45) <= 0.01:
|
| score -= 5
|
| details.append("✅ توزيع جيد 55-45")
|
| elif abs(ratio1 - 0.6) <= 0.05:
|
| score -= 15
|
| details.append("⚠️ توزيع متوسط 60-40")
|
| else:
|
| score -= 25
|
| details.append("❌ توزيع غير متوازن")
|
|
|
| elif num_lines == 3:
|
|
|
| first, middle, last = line_lengths
|
|
|
|
|
| if middle >= first and middle >= last:
|
| score += 15
|
| details.append("✅ السطر الأوسط هو الأكبر")
|
|
|
|
|
| if abs(first - last) <= 2:
|
| score += 15
|
| details.append("✅ الأطراف متساوية")
|
| elif abs(first - last) <= 5:
|
| score += 10
|
| details.append("✅ الأطراف متقاربة")
|
| else:
|
| score -= 10
|
| details.append("⚠️ الأطراف مختلفة")
|
| else:
|
| score -= 20
|
| details.append("❌ السطر الأوسط ليس الأكبر")
|
|
|
| elif num_lines == 4:
|
|
|
| first, second, third, fourth = line_lengths
|
|
|
|
|
| if second >= first and third >= fourth and second >= 0.8 * max(line_lengths) and third >= 0.8 * max(line_lengths):
|
| score += 15
|
| details.append("✅ السطران الأوسطان أكبر")
|
|
|
|
|
| if abs(first - fourth) <= 2:
|
| score += 10
|
| details.append("✅ الأطراف متساوية")
|
|
|
|
|
| if abs(second - third) <= 2:
|
| score += 10
|
| details.append("✅ الأوسطان متساويان")
|
| else:
|
| score -= 15
|
| details.append("❌ توزيع غير مثالي")
|
|
|
| elif num_lines == 5:
|
|
|
| first, second, third, fourth, fifth = line_lengths
|
|
|
|
|
| if third >= max(line_lengths) * 0.9:
|
| score += 10
|
| details.append("✅ السطر الأوسط هو الأكبر")
|
|
|
|
|
| if abs(first - fifth) <= 2:
|
| score += 10
|
| details.append("✅ الأطراف متساوية")
|
|
|
|
|
| if abs(second - fourth) <= 2 and second > first and fourth > fifth:
|
| score += 15
|
| details.append("✅ الثاني والرابع متساويان وأكبر من الأطراف")
|
| else:
|
| score -= 15
|
| details.append("❌ توزيع غير مثالي")
|
|
|
| elif num_lines == 6:
|
|
|
| first, second, third, fourth, fifth, sixth = line_lengths
|
|
|
|
|
| if abs(third - fourth) <= 2 and third >= max(line_lengths) * 0.9:
|
| score += 10
|
| details.append("✅ الأوسطان متساويان وأكبر")
|
|
|
|
|
| if abs(first - sixth) <= 2:
|
| score += 10
|
| details.append("✅ الأطراف متساوية")
|
|
|
|
|
| if abs(second - fifth) <= 2 and second > first and fifth > sixth:
|
| score += 15
|
| details.append("✅ الثاني والخامس متساويان وأكبر من الأطراف")
|
| else:
|
| score -= 15
|
| details.append("❌ توزيع غير مثالي")
|
|
|
|
|
| if score >= 95:
|
| quality = "ممتاز"
|
| elif score >= 85:
|
| quality = "جيد جداً"
|
| elif score >= 70:
|
| quality = "جيد"
|
| elif score >= 50:
|
| quality = "متوسط"
|
| else:
|
| quality = "ضعيف"
|
|
|
| return max(0, score), f"{quality}: {'; '.join(details)}"
|
|
|
| def _distribute_words_optimally(self, words: list, num_lines: int) -> list:
|
| """توزيع الكلمات بشكل مثالي"""
|
| if num_lines == 1:
|
| return [" ".join(words)]
|
|
|
| if num_lines not in self.target_ratios:
|
| return self._simple_distribution(words, num_lines)
|
|
|
| ratios = self.target_ratios[num_lines]
|
| total_chars = self._calculate_total_characters(words)
|
| n = len(words)
|
|
|
|
|
| target_lengths = [int(total_chars * ratio) for ratio in ratios]
|
|
|
|
|
| result_lines = []
|
| current_index = 0
|
|
|
| for i, target_len in enumerate(target_lengths):
|
| if current_index >= n:
|
| break
|
|
|
| best_end = current_index + 1
|
| best_diff = float('inf')
|
|
|
|
|
| max_words_to_try = min(self.max_words_per_line, n - current_index)
|
|
|
| for end in range(current_index + 1, current_index + max_words_to_try + 1):
|
| if end > n:
|
| break
|
|
|
| line_words = words[current_index:end]
|
| line_length = self._get_line_character_length(line_words)
|
|
|
|
|
| if len(line_words) == 1 and len(line_words[0]) < self.min_single_word_length:
|
| continue
|
|
|
|
|
| if line_length > self.max_line_chars:
|
| continue
|
|
|
| diff = abs(line_length - target_len)
|
|
|
|
|
| if num_lines == 4:
|
| if i == 1 or i == 2:
|
|
|
| if line_length >= target_len:
|
| diff -= 20
|
|
|
| if i == 1:
|
| self.temp_second_line_length = line_length
|
| elif i == 2 and hasattr(self, 'temp_second_line_length'):
|
| if abs(line_length - self.temp_second_line_length) <= 2:
|
| diff -= 15
|
| elif i == 0 or i == 3:
|
|
|
| if line_length <= target_len:
|
| diff -= 15
|
|
|
|
|
| if line_length <= self.optimal_line_chars:
|
| diff -= 10
|
|
|
|
|
| if i == num_lines - 1:
|
| remaining_words = n - end
|
| if remaining_words > 0:
|
| diff += remaining_words * 50
|
|
|
| if diff < best_diff:
|
| best_diff = diff
|
| best_end = end
|
|
|
|
|
| if current_index < n:
|
| line_words = words[current_index:best_end]
|
| result_lines.append(" ".join(line_words))
|
| current_index = best_end
|
|
|
|
|
| if current_index < n:
|
| remaining_words = words[current_index:]
|
| if result_lines and len(remaining_words) <= 2:
|
|
|
| last_line_words = result_lines[-1].split() + remaining_words
|
| if len(" ".join(last_line_words)) <= self.max_line_chars:
|
| result_lines[-1] = " ".join(last_line_words)
|
| else:
|
| result_lines.append(" ".join(remaining_words))
|
| else:
|
| result_lines.append(" ".join(remaining_words))
|
|
|
| return result_lines
|
|
|
| def _simple_distribution(self, words: list, num_lines: int) -> list:
|
| """توزيع بسيط كخيار احتياطي"""
|
| n = len(words)
|
| words_per_line = n // num_lines
|
| remainder = n % num_lines
|
|
|
| result = []
|
| index = 0
|
|
|
| for i in range(num_lines):
|
| line_len = words_per_line + (1 if i < remainder else 0)
|
| line_words = words[index:index+line_len]
|
| index += line_len
|
| if line_words:
|
| result.append(" ".join(line_words))
|
|
|
| return result
|
|
|
| def _find_optimal_formatting(self, words: list) -> tuple:
|
| """البحث عن أفضل تنسيق"""
|
| n = len(words)
|
| total_chars = self._calculate_total_characters(words)
|
|
|
|
|
| if n == 1:
|
| return [words[0]], self.perfect_score, "كلمة واحدة"
|
|
|
|
|
| if total_chars <= 25:
|
| return [" ".join(words)], self.perfect_score, "نص قصير - سطر واحد"
|
|
|
| best_result = None
|
| best_score = 0
|
| best_details = ""
|
|
|
|
|
| max_lines = min(6, n)
|
|
|
| for num_lines in range(1, max_lines + 1):
|
| if num_lines > n:
|
| continue
|
|
|
|
|
| lines = self._distribute_words_optimally(words, num_lines)
|
|
|
|
|
| score, details = self._evaluate_line_distribution(lines)
|
|
|
|
|
| if total_chars <= 50 and num_lines <= 2:
|
| score += 10
|
| elif total_chars <= 80 and num_lines <= 3:
|
| score += 5
|
|
|
|
|
| if 50 <= total_chars <= 120 and num_lines == 3:
|
| score += 15
|
|
|
| if score > best_score:
|
| best_score = score
|
| best_result = lines
|
| best_details = details
|
|
|
| return best_result, best_score, best_details
|
|
|
| def format_text(self, text: str) -> dict:
|
| try:
|
| if not text.strip():
|
| return {"formatted": text, "score": 0, "details": "نص فارغ"}
|
|
|
| processed_text = self._preprocess_text(text)
|
| words = processed_text.split()
|
|
|
|
|
| best_lines, score, details = self._find_optimal_formatting(words)
|
|
|
| if best_lines:
|
| formatted_text = "\n".join(best_lines)
|
| else:
|
| formatted_text = processed_text
|
|
|
| return {
|
| "formatted": formatted_text,
|
| "score": score,
|
| "details": details,
|
| "lines_count": len(best_lines) if best_lines else 1,
|
| "original_length": len(processed_text),
|
| "lines_info": [
|
| {
|
| "text": line,
|
| "length": len(line),
|
| "words": len(line.split())
|
| }
|
| for line in best_lines
|
| ] if best_lines else []
|
| }
|
| except Exception as e:
|
| logging.error(f"Bubble formatting error: {str(e)}")
|
| return {
|
| "formatted": text,
|
| "score": 0,
|
| "details": f"Formatting failed: {str(e)}"
|
| }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |