Spaces:

tokyotechlab
/

report

Sleeping

File size: 10,361 Bytes

02dc410

import base64
import re
import io
from PIL import Image, ImageDraw, ImageFont

class ReadmeProcessor:
    def __init__(self):
        """
        Initializes the ReadmeProcessor.
        """
        self.image_frames_base64 = []
        # Try to load a default font for annotation, fall back to a generic one if not found
        try:
            self.font = ImageFont.truetype("arial.ttf", 24) # Increased font size for 840x840 image
        except IOError:
            self.font = ImageFont.load_default()
        self.text_color = (255, 0, 0) # Red color for text (R, G, B)
        self.target_image_size = (240, 240) # New fixed size for all images

    def _parse_image_references(self, text: str) -> list[int]:
        """
        Parses a given text to find all image reference blocks like '[Image X, Y, Z]'
        or '[Image A]' and returns a flat, sorted list of unique 0-indexed image numbers.
        Duplicate image references within the same paragraph (across multiple blocks or within one)
        are handled. The list is sorted by the original image index to ensure "12345..." order.

        Args:
            text: The paragraph text to parse.

        Returns:
            A sorted list of unique 0-indexed integers representing image numbers.
            Returns an empty list if no valid references are found.
        """
        unique_image_indices = set()
        matches = re.findall(r'\[Image\s*((?:\d+\s*(?:,\s*\d+\s*)*))\]', text)
        for match_str in matches:
            for num_str in match_str.split(','):
                try:
                    img_num = int(num_str.strip()) - 1 # Adjust to 0-indexed
                    if 0 <= img_num < len(self.image_frames_base64):
                        unique_image_indices.add(img_num)
                    else:
                        print(f"Warning: Image index {img_num + 1} out of bounds. Skipping in block '{match_str}'.")
                except ValueError:
                    print(f"Warning: Could not parse image number from '{num_str.strip()}' in block '{match_str}'. Skipping.")

        # Return a sorted list of unique indices to maintain order (1, 2, 3, ...)
        return sorted(list(unique_image_indices))

    def _process_single_image_for_display(self, image_index: int) -> str | None:
        """
        Loads an image, resizes it to the target_image_size, annotates it with its
        1-indexed display number, and returns its base64 encoding.

        Args:
            image_index: The 0-indexed position of the image in self.image_frames_base64.

        Returns:
            A base64 encoded string of the resized and annotated image in PNG format,
            or None if an error occurs.
        """
        try:
            img_data_b64 = self.image_frames_base64[image_index]
            img = Image.open(io.BytesIO(base64.b64decode(img_data_b64))).convert("RGB")

            # Resize the image to the fixed target size (840x840)
            img = img.resize(self.target_image_size, Image.LANCZOS)

            draw = ImageDraw.Draw(img)
            # Use 1-indexed number for display
            text_to_draw = f"{image_index + 1}" 
            
            # Get text bounding box for precise positioning
            try:
                # Use textbbox for PIL 9.2.0+
                bbox = draw.textbbox((0, 0), text_to_draw, font=self.font)
                text_width = bbox[2] - bbox[0]
                text_height = bbox[3] - bbox[1]
            except AttributeError:
                # Fallback for older PIL versions (less accurate)
                text_width, text_height = draw.textsize(text_to_draw, font=self.font)

            # Position text at the top-left corner with a small padding
            padding = 10 # Increased padding for larger image
            text_x = padding
            text_y = padding
            
            draw.text((text_x, text_y), text_to_draw, font=self.font, fill=self.text_color)
            
            buffered = io.BytesIO()
            img.save(buffered, format="PNG")
            return base64.b64encode(buffered.getvalue()).decode('utf-8')

        except (IndexError, TypeError, ValueError, IOError) as e:
            print(f"Error processing image at index {image_index} for annotation: {e}. Skipping.")
            return None

    def _get_image_markdown_tag(self, base64_image_data: str, alt_text: str = "Annotated Image") -> str:
        """
        Generates a Markdown image tag from base64 data.
        """
        return f"![{alt_text}](data:image/png;base64,{base64_image_data})"


    def process_readme(self, readme_text: str, image_frames_base64: list[str]) -> str:
        """
        Processes the input README text, finds unique image references in each paragraph,
        resizes and annotates each image, and embeds them as individual Markdown images
        below the respective paragraph. Images are ordered by their original index,
        and displayed with a maximum of two images per line.

        Args:
            readme_text: The full README text content.
            image_frames_base64: A list of base64 encoded strings, representing image frames.

        Returns:
            The processed README text with embedded images.
        """
        processed_lines = []
        self.image_frames_base64 = image_frames_base64
        
        paragraphs = readme_text.split('\n\n')

        for paragraph in paragraphs:
            processed_lines.append(paragraph) # Add the original paragraph

            # Get all unique, sorted image indices for this paragraph
            unique_image_indices = self._parse_image_references(paragraph)

            if unique_image_indices:
                image_tags_for_paragraph = []
                for img_idx in unique_image_indices:
                    annotated_b64 = self._process_single_image_for_display(img_idx)
                    if annotated_b64:
                        image_tags_for_paragraph.append(self._get_image_markdown_tag(annotated_b64, f"Image {img_idx + 1}"))
                    else:
                        print(f"Could not process image {img_idx + 1} for paragraph: '{paragraph[:50]}...'")

                # Arrange image tags with max 2 per line
                if image_tags_for_paragraph:
                    processed_lines.append("\n") # Add a newline before image block
                    for i in range(0, len(image_tags_for_paragraph), 2):
                        row_images = image_tags_for_paragraph[i:i+2]
                        # Join with a space to place images side-by-side in Markdown
                        
                        #comment
                        # processed_lines.append(" ".join(row_images)) 
                        # Add an empty string (newline) after each row of images,
                        # to ensure the next row or paragraph starts on a new line.
                        # This avoids issues if the last row only has one image.
                        processed_lines.append("") 
            
            # Ensure there's always an empty line after a paragraph block
            # This handles cases where there were no images for the paragraph
            # or if the last image row already added a newline.
            if processed_lines and processed_lines[-1] != "":
                processed_lines.append("")

        return "\n".join(processed_lines).strip()

# --- Example Usage ---
if __name__ == "__main__":
    # Create some dummy base64 image data for demonstration
    def create_dummy_image_b64(width, height, color):
        img = Image.new('RGB', (width, height), color)
        buffered = io.BytesIO()
        img.save(buffered, format="PNG")
        return base64.b64encode(buffered.getvalue()).decode('utf-8')

    # Simulate 6 image frames with varying original sizes (they will all be resized to 840x840)
    dummy_frames = [
        create_dummy_image_b64(150, 80, (255, 200, 200)), # Light Red
        create_dummy_image_b64(220, 160, (200, 255, 200)), # Light Green
        create_dummy_image_b64(180, 140, (200, 200, 255)), # Light Blue
        create_dummy_image_b64(250, 170, (255, 255, 200)), # Light Yellow
        create_dummy_image_b64(190, 155, (200, 255, 255)), # Light Cyan
        create_dummy_image_b64(210, 145, (255, 200, 255))  # Light Magenta
    ]

    # Your example README text
    readme_content = """
The earliest verifiable publication of this video is on November 13, 2023. The video was posted to the Telegram channel @SerajSat at 11:03 AM [0]. This represents the first known instance of the video's circulation based on the provided sources.

The visual information within the keyframes shows a daytime scene with diffuse lighting, likely due to heavy smoke, dust, or fog, which obscures clear shadows for a more precise time-of-day analysis [Image 1, 1, 2, 6]. The presence of numerous Palestine Red Crescent Society (PRCS) ambulances gathered outside a building suggests an emergency situation [Image 3].

Given that the video was published on November 13, 2023, it is highly probable that the event depicted was filmed on or very shortly before that date and time. This conclusion is further supported by additional visual cues [Image 4, 5].
"""

    # Initialize the processor with dummy frames
    processor = ReadmeProcessor()

    # Process the README content
    processed_readme = processor.process_readme(readme_content, dummy_frames)

    # Print the result (this would typically be saved to a .md file or displayed)
    print("--- Processed README Content ---")
    print(processed_readme)

    # Save the processed README content to a file
    with open("processed_readme.md", "w") as f:
        f.write(processed_readme)

    # Example with no image references
    print("\n--- Processed README Content (No Images) ---")
    no_image_readme = "This paragraph has no images. Another paragraph follows."
    processed_no_image_readme = processor.process_readme(no_image_readme, dummy_frames)
    print(processed_no_image_readme)

    # Example with out-of-bounds image reference
    print("\n--- Processed README Content (Out-of-bounds Image) ---")
    out_of_bounds_readme = "This paragraph references an image that doesn't exist [Image 99]."
    processed_out_of_bounds_readme = processor.process_readme(out_of_bounds_readme, dummy_frames)
    print(processed_out_of_bounds_readme)