Spaces:

Hawk3388
/

solver

Sleeping

App Files Files Community

Hawk3388 commited on 16 days ago

Commit

03d6964

1 Parent(s): 3dfc234

modified: app.py

Browse files

modified: main.py
modified: model/gap_detection_model.pt

Files changed (3) hide show

app.py +58 -10
main.py +326 -56
model/gap_detection_model.pt +2 -2

app.py CHANGED Viewed

@@ -2,10 +2,12 @@ import os
 import tempfile
 import uuid
 import warnings
 import gradio as gr
 import requests
 from PIL import Image
 from main import WorksheetSolver
@@ -13,24 +15,70 @@ warnings.filterwarnings("ignore")
 ALLOWED_EXTENSIONS = {"png", "jpg", "jpeg", "webp", "bmp"}
 GAP_DETECTION_MODEL_PATH = "./model/gap_detection_model.pt"
-GAP_MODEL_URL = "https://github.com/Hawk3388/solver/releases/download/v1.1.0/gap_detection_model.pt"
 def ensure_gap_model() -> str:
-	os.makedirs("./model", exist_ok=True)
-	if os.path.exists(GAP_DETECTION_MODEL_PATH):
-		return GAP_DETECTION_MODEL_PATH
-	with requests.get(GAP_MODEL_URL, stream=True, timeout=60) as response:
-		response.raise_for_status()
-		with open(GAP_DETECTION_MODEL_PATH, "wb") as model_file:
-			for chunk in response.iter_content(chunk_size=8192):
-				if chunk:
-					model_file.write(chunk)
 	return GAP_DETECTION_MODEL_PATH
 def _is_allowed_image(filename: str) -> bool:
 	return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS

 import tempfile
 import uuid
 import warnings
+import re
 import gradio as gr
 import requests
 from PIL import Image
+from pathlib import Path
 from main import WorksheetSolver
 ALLOWED_EXTENSIONS = {"png", "jpg", "jpeg", "webp", "bmp"}
 GAP_DETECTION_MODEL_PATH = "./model/gap_detection_model.pt"
+RELEASES_URL = "https://github.com/Hawk3388/solver/releases"
 def ensure_gap_model() -> str:
+	download = False
+	os.makedirs("./model", exist_ok=True)
+	folder_path = Path("./model")
+	model_folder_names = [p.name for p in folder_path.iterdir() if p.is_dir()]
+	if model_folder_names:
+		latest_version = sorted(model_folder_names, key=lambda s: list(map(int, s.lstrip("v").split("."))), reverse=True)[0]
+		model_path = folder_path / latest_version / "gap_detection_model.pt"
+		if not model_path.exists():
+			download = True
+	else:
+		download = True
+	release_response = requests.get(RELEASES_URL)
+	if release_response.status_code == 200:
+		pattern = re.compile(r"<h2[^>]*>(v\d+\.\d+\.\d+)</h2>")
+		versions = pattern.findall(release_response.text)
+		if not versions:
+			raise Exception("Could not determine the latest model version from GitHub releases.")
+	else:
+		raise Exception(f"Failed to fetch releases from GitHub: {release_response.status_code}")
+	for version in versions:
+		GAP_MODEL_URL = f"https://github.com/Hawk3388/solver/releases/download/{version}/gap_detection_model.pt"
+		if not url_exists(GAP_MODEL_URL):
+			continue
+		if download:
+			with requests.get(GAP_MODEL_URL, stream=True, timeout=60) as response:
+				with open(GAP_DETECTION_MODEL_PATH, "wb") as model_file:
+					for chunk in response.iter_content(chunk_size=8192):
+						if chunk:
+							model_file.write(chunk)
+			GAP_DETECTION_MODEL_PATH = str(folder_path / version / "gap_detection_model.pt")
+			break
+		else:
+			compare_versions = sorted([latest_version, version], key=lambda s: list(map(int, s.lstrip("v").split("."))), reverse=True)
+			newer_version = compare_versions[0]
+			if newer_version != latest_version:
+				with requests.get(GAP_MODEL_URL, stream=True, timeout=60) as response:
+					with open(GAP_DETECTION_MODEL_PATH, "wb") as model_file:
+						for chunk in response.iter_content(chunk_size=8192):
+							if chunk:
+								model_file.write(chunk)
+				GAP_DETECTION_MODEL_PATH = str(folder_path / version / "gap_detection_model.pt")
+				break
+			else:
+				GAP_DETECTION_MODEL_PATH = str(model_path)
 	return GAP_DETECTION_MODEL_PATH
+def url_exists(url: str, timeout: float = 5.0) -> bool:
+    try:
+        r = requests.head(url, allow_redirects=True, timeout=timeout)
+        return (200 <= r.status_code < 400)
+    except requests.RequestException as e:
+        return False
 def _is_allowed_image(filename: str) -> bool:
 	return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS

main.py CHANGED Viewed

@@ -143,6 +143,11 @@ class WorksheetSolver():
         self.image = None
         self.detected_gaps = []
     def load_image(self, image_path: str):
         """Load image and create a copy for processing"""
@@ -231,11 +236,11 @@ class WorksheetSolver():
         current_line = [boxes_sorted[0]]
         # y-center and height of the current line
         line_y_min = boxes_sorted[0][1]
-        line_y_max = boxes_sorted[0][3] if len(boxes_sorted[0]) == 4 else boxes_sorted[0][1] + boxes_sorted[0][3]
         for box in boxes_sorted[1:]:
             box_y_top = box[1]
-            box_y_bottom = box[3] if len(box) == 4 else box[1] + box[3]
             box_height = box_y_bottom - box_y_top
             line_height = line_y_max - line_y_min
@@ -266,8 +271,172 @@ class WorksheetSolver():
         return result
     def detect_gaps(self):
         self.detected_gaps = []
         results = self.model.predict(source=self.path, conf=0.10)
@@ -286,51 +455,100 @@ class WorksheetSolver():
             else:
                 for idx in keep_indices:
                     box = r.boxes[idx]
                     x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().astype(int)
-                    self.detected_gaps.append((int(x1), int(y1), int(x2), int(y2)))
                 img = r.orig_img.copy()
         # Sort in reading order (line by line)
         self.detected_gaps = self.sort_reading_order(self.detected_gaps)
         return self.detected_gaps, img
     def mark_gaps(self, image, gaps):
-        """Mark detected gaps in the image with numbers"""
-        for i, gap in enumerate(gaps):
-            x1, y1, x2, y2 = gap
-            # Draw red box
             cv2.rectangle(image, (x1, y1), (x2, y2), (0, 0, 255), 2)
-            # Number at top left of the box
-            label = str(i + 1)
             label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.4, 1)
-            # Background for better readability
             cv2.rectangle(image, (x1, y1 - label_size[1] - 4), (x1 + label_size[0] + 2, y1), (0, 0, 255), -1)
-            cv2.putText(image, label, (x1 + 1, y1 - 3), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1)
         return image
     def ask_ai_about_all_gaps(self, marked_image):
-        """Ask Gemini about the content of ALL gaps at once - just like test3"""
         if self.debug:
             start_time = self.time.time()
-        # Save the marked image (with boxes) just as test3 expects
         thinking = None
         marked_image_path = f"{Path(self.path).stem}_marked.png"
         cv2.imwrite(marked_image_path, marked_image)
-        prompt = f"""Look at the two images: one with red numbered boxes marking {len(self.detected_gaps)} gaps, one without markings.
-For each red box, read its number label and fill in the missing word(s) from the worksheet.
 Rules:
 - Answer in the worksheet's language.
-- Only the missing word(s), nothing else.
-- Match each answer to the correct box number.
-- If a box doesn't need filling, because it is already filled or is not a gap, answer with "none".
 - Do NOT overthink. These are simple language exercises. Answer quickly and directly. Only reason for about 10 sentences.
 - Look at the sheets carefully and use them as context for your answers.
-- Only answer in this exact JSON format: {{"solutions": [{{"key": box_number, "value": answer}}]}}"""
         if not self.experimental:
             if not self.local:
@@ -434,64 +652,72 @@ Rules:
         return output
     def solve_all_gaps(self, marked_image):
-        """Solve all detected gaps with Ollama - structured!"""
         if not self.detected_gaps:
             print("No gaps found!")
             return {}
-        print(f"🤖 Analyzing all {len(self.detected_gaps)} gaps with Ollama...")
-        # Ask Ollama about all gaps at once
-        print("📤 Sending image to Ollama...")
         solutions_data = self.ask_ai_about_all_gaps(marked_image)
         if solutions_data:
-            print("📥 Structured Ollama response received!")
             # Convert structured response to our format
             solutions = {}
-            # solutions_data.solutions is now a list of Pair objects
             for pair in solutions_data.solutions:
                 try:
-                    gap_id = pair.key
                     answer = pair.value
-                    gap_index = gap_id - 1  # 0-based
-                    if 0 <= gap_index < len(self.detected_gaps):
-                        solutions[gap_index] = {
-                            'position': self.detected_gaps[gap_index],
                             'solution': answer
                         }
                 except (ValueError, KeyError) as e:
-                    print(f"Error processing gap {gap_id}: {e}")
                     continue
             return solutions
         else:
-            print("❌ No response received from Ollama.")
             return {}
     def fill_gaps_in_image(self, image_path: str, solutions: dict, output_path: str = "worksheet_solved.png"):
-        """Fill the solutions into the image"""
         # Load OpenCV image and convert to PIL (for Unicode/umlauts)
         cv_image = self.load_image(image_path)
         pil_image = Image.fromarray(cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB))
         draw = ImageDraw.Draw(pil_image)
-        for gap_index, solution_data in solutions.items():
-            # Position is (x1, y1, x2, y2)
-            x1, y1, x2, y2 = solution_data['position']
-            w = x2 - x1
-            h = y2 - y1
             solution = solution_data['solution']
             if not solution or solution.lower() == 'none':
                 continue
-            # Find dynamic font size
-            font_size = 40  # Start large
             min_font_size = 8
             font = None
@@ -505,27 +731,61 @@ Rules:
                         font = ImageFont.load_default()
                         break
                 bbox = draw.textbbox((0, 0), solution, font=font)
                 text_width = bbox[2] - bbox[0]
                 text_height = bbox[3] - bbox[1]
                 padding = 4
-                if text_width <= w - padding and text_height <= h - padding:
-                    break
                 font_size -= 1
-            # Measure text size with final font
-            bbox = draw.textbbox((0, 0), solution, font=font)
-            text_width = bbox[2] - bbox[0]
-            text_height = bbox[3] - bbox[1]
-            # Position text centered in the box
-            text_x = x1 + (w - text_width) // 2
-            text_y = y1 + (h - text_height) // 2
-            # Draw text in black
-            draw.text((text_x, text_y), solution, fill=(0, 0, 0), font=font)
         # Convert back to OpenCV and save
         result_image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
@@ -551,13 +811,21 @@ def main():
     try:
         gaps, img = solver.detect_gaps()
-        print(f"✅ {len(gaps)} gaps found!")
         marked_image = solver.mark_gaps(img, gaps)
         print("\n📍 Detected gaps (x, y, width, height):")
         for i, gap in enumerate(gaps):
-            print(f"  Gap {i+1}: {gap}")
         if solver.debug:
             # Ask user if AI analysis is desired
@@ -572,8 +840,10 @@ def main():
             if solutions:
                 print("\n✨ Solutions found:")
-                for i, sol in solutions.items():
-                    print(f"  Gap {i+1}: '{sol['solution']}'")
                 solver.fill_gaps_in_image(path, solutions)

         self.image = None
         self.detected_gaps = []
+        self.gap_groups = []  # Groups of gap indices
+        self.gap_to_group = {}  # Maps gap index to group index
+        self.ungrouped_gap_indices = []
+        self.answer_units = []  # Line groups + single ungrouped boxes
+        self.gap_to_answer_unit = {}  # Maps any gap index to answer unit index
     def load_image(self, image_path: str):
         """Load image and create a copy for processing"""
         current_line = [boxes_sorted[0]]
         # y-center and height of the current line
         line_y_min = boxes_sorted[0][1]
+        line_y_max = boxes_sorted[0][3]
         for box in boxes_sorted[1:]:
             box_y_top = box[1]
+            box_y_bottom = box[3]
             box_height = box_y_bottom - box_y_top
             line_height = line_y_max - line_y_min
         return result
+    def is_line_class(self, class_name):
+        """True only for the exact YOLO class name 'line'."""
+        return str(class_name).strip().lower() == "line"
+    def _unit_bbox(self, unit, gaps):
+        """Return merged bbox (x1, y1, x2, y2) for an answer unit."""
+        boxes = [gaps[i][:4] for i in unit if 0 <= i < len(gaps)]
+        if not boxes:
+            return (0, 0, 0, 0)
+        return (
+            min(b[0] for b in boxes),
+            min(b[1] for b in boxes),
+            max(b[2] for b in boxes),
+            max(b[3] for b in boxes),
+        )
+    def sort_answer_units_reading_order(self, units, gaps):
+        """Sort answer units globally by reading order: top->bottom, left->right."""
+        if not units:
+            return []
+        unit_data = []
+        for idx, unit in enumerate(units):
+            x1, y1, x2, y2 = self._unit_bbox(unit, gaps)
+            unit_data.append({
+                "idx": idx,
+                "unit": unit,
+                "x1": x1,
+                "y1": y1,
+                "x2": x2,
+                "y2": y2,
+                "h": max(1, y2 - y1),
+            })
+        unit_data.sort(key=lambda u: u["y1"])
+        rows = []
+        current_row = [unit_data[0]]
+        row_y_min = unit_data[0]["y1"]
+        row_y_max = unit_data[0]["y2"]
+        for u in unit_data[1:]:
+            overlap = min(row_y_max, u["y2"]) - max(row_y_min, u["y1"])
+            row_h = max(1, row_y_max - row_y_min)
+            min_h = max(1, min(row_h, u["h"]))
+            if overlap > 0 and (overlap / min_h) > 0.3:
+                current_row.append(u)
+                row_y_min = min(row_y_min, u["y1"])
+                row_y_max = max(row_y_max, u["y2"])
+            else:
+                rows.append(current_row)
+                current_row = [u]
+                row_y_min = u["y1"]
+                row_y_max = u["y2"]
+        rows.append(current_row)
+        sorted_units = []
+        for row in rows:
+            row.sort(key=lambda u: u["x1"])
+            sorted_units.extend([u["unit"] for u in row])
+        return sorted_units
+    def group_gaps_by_proximity(self, gaps):
+        """Group gaps that are directly below each other into groups.
+        Returns:
+            List of groups, where each group is a list of gap indices (0-based) sorted by Y position
+            Also returns a mapping from gap index to group index
+        """
+        if not gaps:
+            return [], {}
+        # Create index mapping: sorted_idx -> original_idx
+        indices = list(range(len(gaps)))
+        sorted_indices = sorted(indices, key=lambda i: gaps[i][1])  # Sort by Y (top to bottom)
+        # Calculate average gap height as threshold
+        heights = [(gap[3] - gap[1]) for gap in gaps]
+        avg_height = sum(heights) / len(heights) if heights else 0
+        # Distance threshold: gaps are "below each other" if distance < avg_height * 1.5
+        distance_threshold = avg_height * 1.5
+        groups = []
+        gap_to_group = {}
+        grouped = set()
+        # Process gaps from top to bottom
+        for sort_i, i in enumerate(sorted_indices):
+            if i in grouped:
+                continue
+            gap_i = gaps[i]
+            x1_i, y1_i, x2_i, y2_i = gap_i[:4]
+            class_name_i = gap_i[4] if len(gap_i) > 4 else "line"
+            # Only exact 'line' class is groupable. Other classes are ignored here.
+            if not self.is_line_class(class_name_i):
+                continue
+            # Start new group with current line gap
+            current_group = [i]
+            grouped.add(i)
+            # Look for gaps below this one
+            for sort_j in range(sort_i + 1, len(sorted_indices)):
+                j = sorted_indices[sort_j]
+                if j in grouped:
+                    continue
+                gap_j = gaps[j]
+                x1_j, y1_j, x2_j, y2_j = gap_j[:4]
+                class_name_j = gap_j[4] if len(gap_j) > 4 else "line"
+                # Only group if both are exact line class detections
+                if not self.is_line_class(class_name_j):
+                    continue
+                # Check vertical distance (gap j should be below gap i)
+                vertical_distance = y1_j - y2_i
+                # Check horizontal alignment
+                i_left, i_top, i_right, i_bottom = x1_i, y1_i, x2_i, y2_i
+                j_left, j_top, j_right, j_bottom = x1_j, y1_j, x2_j, y2_j
+                # Calculate horizontal overlap
+                h_overlap_start = max(i_left, j_left)
+                h_overlap_end = min(i_right, j_right)
+                h_overlap = max(0, h_overlap_end - h_overlap_start)
+                # Box widths
+                i_width = i_right - i_left
+                j_width = j_right - j_left
+                min_width = min(i_width, j_width)
+                # Check if box j is below box i and horizontally aligned
+                if 0 < vertical_distance < distance_threshold:
+                    # At least 30% overlap or 15px minimum
+                    if h_overlap > min_width * 0.3 or h_overlap > 15:
+                        current_group.append(j)
+                        grouped.add(j)
+                        gap_i = gap_j  # Update for next iteration
+                        x1_i, y1_i, x2_i, y2_i = gap_i[:4]
+                    else:
+                        # Not enough overlap, end this group
+                        break
+                else:
+                    # Distance too large, end this group
+                    break
+            # Store group (sort indices in return order)
+            current_group.sort()
+            for idx in current_group:
+                gap_to_group[idx] = len(groups)
+            groups.append(current_group)
+        return groups, gap_to_group
     def detect_gaps(self):
         self.detected_gaps = []
+        img = self.load_image(self.path)
         results = self.model.predict(source=self.path, conf=0.10)
             else:
                 for idx in keep_indices:
                     box = r.boxes[idx]
+                    class_id = int(box.cls[0])
+                    class_name = r.names[class_id]
                     x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().astype(int)
+                    self.detected_gaps.append((int(x1), int(y1), int(x2), int(y2), class_name))
                 img = r.orig_img.copy()
         # Sort in reading order (line by line)
         self.detected_gaps = self.sort_reading_order(self.detected_gaps)
+        # Group gaps by proximity (vertically aligned and close together)
+        self.gap_groups, self.gap_to_group = self.group_gaps_by_proximity(self.detected_gaps)
+        self.ungrouped_gap_indices = [i for i in range(len(self.detected_gaps)) if i not in self.gap_to_group]
+        # Build answer units for the AI:
+        # - grouped line boxes stay grouped
+        # - each ungrouped box (e.g. class gap) becomes its own single unit
+        unsorted_units = list(self.gap_groups) + [[idx] for idx in self.ungrouped_gap_indices]
+        self.answer_units = self.sort_answer_units_reading_order(unsorted_units, self.detected_gaps)
+        self.gap_to_answer_unit = {}
+        for unit_idx, unit in enumerate(self.answer_units):
+            for gap_idx in unit:
+                self.gap_to_answer_unit[gap_idx] = unit_idx
+        print(f"📊 Line-boxes grouped into {len(self.gap_groups)} groups")
+        for i, group in enumerate(self.gap_groups):
+            print(f"   Group {i+1}: {len(group)} gaps (indices: {group})")
+        print(f"📌 Ungrouped boxes (e.g. gap): {len(self.ungrouped_gap_indices)}")
+        print(f"🧠 Total AI answer units: {len(self.answer_units)}")
         return self.detected_gaps, img
     def mark_gaps(self, image, gaps):
+        """Draw one red box per answer unit (group) instead of per single line."""
+        if not self.answer_units:
+            return image
+        for unit_idx, unit in enumerate(self.answer_units):
+            unit_boxes = [gaps[i][:4] for i in unit if 0 <= i < len(gaps)]
+            if not unit_boxes:
+                continue
+            # Surround the whole group with one box.
+            x1 = min(b[0] for b in unit_boxes)
+            y1 = min(b[1] for b in unit_boxes)
+            x2 = max(b[2] for b in unit_boxes)
+            y2 = max(b[3] for b in unit_boxes)
             cv2.rectangle(image, (x1, y1), (x2, y2), (0, 0, 255), 2)
+            label = str(unit_idx + 1)
             label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.4, 1)
             cv2.rectangle(image, (x1, y1 - label_size[1] - 4), (x1 + label_size[0] + 2, y1), (0, 0, 255), -1)
+            cv2.putText(image, (label), (x1 + 1, y1 - 3), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1)
         return image
     def ask_ai_about_all_gaps(self, marked_image):
+        """Ask Gemini about the content of ALL gap groups at once"""
         if self.debug:
             start_time = self.time.time()
         thinking = None
         marked_image_path = f"{Path(self.path).stem}_marked.png"
         cv2.imwrite(marked_image_path, marked_image)
+        # Build description of answer units
+        group_descriptions = []
+        for i, group in enumerate(self.answer_units):
+            group_num = i + 1
+            first_idx = group[0]
+            class_name = str(self.detected_gaps[first_idx][4]) if len(self.detected_gaps[first_idx]) > 4 else "gap"
+            if len(group) > 1:
+                group_descriptions.append(f"Group {group_num}: {len(group)} stacked line boxes (marked as {group_num})")
+            else:
+                group_descriptions.append(f"Group {group_num}: 1 single {class_name} box (marked as {group_num})")
+        group_text = "\n".join(group_descriptions)
+        prompt = f"""Look at the two images: one with red numbered boxes marking {len(self.answer_units)} answer groups, one without markings.
+Answer groups to fill:
+{group_text}
+For each group marked with its number label, provide ONE answer that should fill that group.
+The answer will be distributed across the stacked lines (first line(s) filled first, then overflow to next line).
 Rules:
 - Answer in the worksheet's language.
+- Provide text that makes sense when distributed line by line.
+- Match each answer to the correct group number.
+- If a group doesn't need filling, answer with "none".
 - Do NOT overthink. These are simple language exercises. Answer quickly and directly. Only reason for about 10 sentences.
 - Look at the sheets carefully and use them as context for your answers.
+- Only answer in this exact JSON format: {{"solutions": [{{"key": group_number, "value": answer}}]}}"""
         if not self.experimental:
             if not self.local:
         return output
     def solve_all_gaps(self, marked_image):
+        """Solve all gap groups with Ollama - structured!"""
         if not self.detected_gaps:
             print("No gaps found!")
             return {}
+        if not self.answer_units:
+            print("No answer units found to solve.")
+            return {}
+        print(f"🤖 Analyzing all {len(self.answer_units)} answer units with AI...")
+        # Ask AI about all gap groups at once
+        print("📤 Sending image to AI...")
         solutions_data = self.ask_ai_about_all_gaps(marked_image)
         if solutions_data:
+            print("📥 Structured AI response received!")
             # Convert structured response to our format
             solutions = {}
+            # solutions_data.solutions is now a list of GroupPair objects
             for pair in solutions_data.solutions:
                 try:
+                    group_id = pair.key
                     answer = pair.value
+                    group_index = group_id - 1  # 0-based
+                    if 0 <= group_index < len(self.answer_units):
+                        gap_indices = self.answer_units[group_index]
+                        solutions[group_index] = {
+                            'gap_indices': gap_indices,
                             'solution': answer
                         }
                 except (ValueError, KeyError) as e:
+                    print(f"Error processing group {group_id}: {e}")
                     continue
             return solutions
         else:
+            print("❌ No response received from AI.")
             return {}
     def fill_gaps_in_image(self, image_path: str, solutions: dict, output_path: str = "worksheet_solved.png"):
+        """Fill the solutions into grouped gaps with text flowing across multiple boxes"""
         # Load OpenCV image and convert to PIL (for Unicode/umlauts)
         cv_image = self.load_image(image_path)
         pil_image = Image.fromarray(cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB))
         draw = ImageDraw.Draw(pil_image)
+        for group_index, solution_data in solutions.items():
+            gap_indices = solution_data['gap_indices']
             solution = solution_data['solution']
             if not solution or solution.lower() == 'none':
                 continue
+            # Get all boxes for this group
+            boxes = [self.detected_gaps[idx] for idx in gap_indices]
+            # Calculate total available space
+            total_width = sum(box[2] - box[0] for box in boxes)
+            avg_height = boxes[0][3] - boxes[0][1]
+            # Find optimal font size for this solution
+            font_size = 40
             min_font_size = 8
             font = None
                         font = ImageFont.load_default()
                         break
+                # Test if text fits
                 bbox = draw.textbbox((0, 0), solution, font=font)
                 text_width = bbox[2] - bbox[0]
                 text_height = bbox[3] - bbox[1]
+                # Check if it fits in available space (with padding)
                 padding = 4
+                if text_height <= avg_height - padding:
+                    # For width, use total available width or at least one box width
+                    if text_width <= total_width - padding or text_width <= (boxes[0][2] - boxes[0][0]) - padding:
+                        break
                 font_size -= 1
+            # Distribute text across boxes in the group
+            words = solution.split()
+            current_box_idx = 0
+            x_offset = boxes[current_box_idx][0]  # Start position in current box
+            for word in words:
+                if current_box_idx >= len(boxes):
+                    break
+                # Get current box dimensions
+                x1, y1, x2, y2 = boxes[current_box_idx][:4]
+                box_width = x2 - x1
+                box_height = y2 - y1
+                # Measure word with space
+                word_with_space = word + " "
+                bbox = draw.textbbox((0, 0), word_with_space, font=font)
+                word_width = bbox[2] - bbox[0]
+                text_height = bbox[3] - bbox[1]
+                # Check if word fits in current box
+                available_width = (x2 - x_offset) - 4  # Subtract padding
+                if word_width <= available_width:
+                    # Word fits in current box
+                    text_y = y1 + (box_height - text_height) // 2
+                    draw.text((x_offset, text_y), word_with_space, fill=(0, 0, 0), font=font)
+                    x_offset += word_width
+                else:
+                    # Word doesn't fit - move to next box
+                    current_box_idx += 1
+                    if current_box_idx < len(boxes):
+                        x1, y1, x2, y2 = boxes[current_box_idx][:4]
+                        x_offset = x1 + 2  # Small padding
+                        # Now place the word in the new box
+                        if word_width <= (x2 - x_offset) - 4:
+                            text_y = y1 + (box_height - text_height) // 2
+                            draw.text((x_offset, text_y), word_with_space, fill=(0, 0, 0), font=font)
+                            x_offset += word_width
         # Convert back to OpenCV and save
         result_image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
     try:
         gaps, img = solver.detect_gaps()
+        print(f"✅ {len(gaps)} boxes found, {len(solver.gap_groups)} line groups, {len(solver.ungrouped_gap_indices)} ungrouped!")
         marked_image = solver.mark_gaps(img, gaps)
         print("\n📍 Detected gaps (x, y, width, height):")
         for i, gap in enumerate(gaps):
+            unit_num = solver.gap_to_answer_unit.get(i)
+            if unit_num is not None:
+                print(f"  Box {i+1} (Group {unit_num + 1}): {gap}")
+            else:
+                print(f"  Box {i+1} (ungrouped): {gap}")
+        print("\n📊 Gap groups:")
+        for g_idx, group in enumerate(solver.gap_groups):
+            print(f"  Group {g_idx+1}: gaps {[idx+1 for idx in group]}")
         if solver.debug:
             # Ask user if AI analysis is desired
             if solutions:
                 print("\n✨ Solutions found:")
+                for group_idx, sol in solutions.items():
+                    group_num = group_idx + 1
+                    gap_indices = [idx+1 for idx in sol['gap_indices']]
+                    print(f"  Group {group_num} (gaps {gap_indices}): '{sol['solution']}'")
                 solver.fill_gaps_in_image(path, solutions)

model/gap_detection_model.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a09d72ab83480428164c040356af5dce6b59fd42d305621901d9d234f0657c09
-size 53210085

 version https://git-lfs.github.com/spec/v1
+oid sha256:2593fee314b21afead4fc047f7c545b7e117ef37fba80bac452880e89ab1fb18
+size 53167589