Scratch_Vision_Game_v1

Sleeping

App Files Files Community

prthm11 commited on Aug 5, 2025

Commit

cabfc89

verified ·

1 Parent(s): 85c969e

Update app.py

Browse files

Files changed (1) hide show

app.py +136 -204

app.py CHANGED Viewed

@@ -1652,35 +1652,46 @@ scratch_keywords = [
 #def extract_images_from_pdf(pdf_path: Path, json_base_dir: Path, image_base_dir: Path):
 #def extract_images_from_pdf(pdf_path: Path, json_base_dir: Path):
-def extract_images_from_pdf(pdf_path: Path):
     ''' Extract images from PDF and generate structured sprite JSON '''
     try:
-        pdf_path = Path(pdf_path)
-        pdf_filename = pdf_path.stem               # e.g., "scratch_crab"
-        pdf_dir_path = str(pdf_path.parent).replace("/", "\\")
-        print("-------------------------------pdf_filename-------------------------------",pdf_filename)
-        print("-------------------------------pdf_dir_path-------------------------------",pdf_dir_path)
-        # Create subfolders under the provided base directories
-        # This will create paths like:
-        # /app/detected_images/pdf_filename/
-        # /app/json_data/pdf_filename/
-        extracted_image_subdir = DETECTED_IMAGE_DIR / pdf_filename
-        json_subdir = JSON_DIR / pdf_filename
-        extracted_image_subdir.mkdir(parents=True, exist_ok=True)
-        json_subdir.mkdir(parents=True, exist_ok=True)
-        print("-------------------------------extracted_image_subdir-------------------------------",extracted_image_subdir)
-        print("-------------------------------json_subdir-------------------------------",json_subdir)
-        # Output paths (now using Path objects directly)
-        output_json_path = json_subdir / "extracted.json"
-        final_json_path = json_subdir / "extracted_sprites.json" # Path to extracted_sprites.json
-        final_json_path_2 = json_subdir / "extracted_sprites_2.json"
-        print("-------------------------------output_json_path-------------------------------",output_json_path)
-        print("-------------------------------final_json_path-------------------------------",final_json_path)
-        print("-------------------------------final_json_path_2-------------------------------",final_json_path_2)
         try:
             elements = partition_pdf(
-                filename=str(pdf_path), # partition_pdf might expect a string
                 strategy="hi_res",
                 extract_image_block_types=["Image"],
                 hi_res_model_name="yolox",
@@ -1691,151 +1702,39 @@ def extract_images_from_pdf(pdf_path: Path):
             raise RuntimeError(
                 f"❌ Failed to extract images from PDF: {str(e)}")
-        try:
-            with open(output_json_path, "w") as f:
-                json.dump([element.to_dict()
-                          for element in elements], f, indent=4)
-        except Exception as e:
-            raise RuntimeError(f"❌ Failed to write extracted.json: {str(e)}")
-        try:
-            # Display extracted images
-            with open(output_json_path, 'r') as file:
-                file_elements = json.load(file)
-        except Exception as e:
-            raise RuntimeError(f"❌ Failed to read extracted.json: {str(e)}")
-        # Prepare manipulated sprite JSON structure
-        manipulated_json = {}
-        # SET A SYSTEM PROMPT
-        system_prompt = """
-            You are an expert in visual scene understanding.
-            Your Job is to analyze an image and respond acoording if asked for name give simple name by analyzing it and if ask for descrption generate a short description covering its elements.
-            Guidelines:
-            - Focus only the images given in Square Shape.
-            - Don't Consider Blank areas in Image as.
-            - Don't include generic summary or explanation outside the fields.
-            Return only string.
-            """
-        agent = create_react_agent(
-            model=llm,
-            tools=[],
-            prompt=system_prompt
-        )
-        # If JSON already exists, load it and find the next available Sprite number
-        if final_json_path.exists(): # Use Path.exists()
-            with open(final_json_path, "r") as existing_file:
-                manipulated = json.load(existing_file)
-            # Determine the next available index (e.g., Sprite 4 if 1–3 already exist)
-            existing_keys = [int(k.replace("Sprite ", ""))
-                             for k in manipulated.keys()]
-            start_count = max(existing_keys, default=0) + 1
-        else:
-            start_count = 1
-        sprite_count = start_count
-        for i, element in enumerate(file_elements):
-            if "image_base64" in element["metadata"]:
-                try:
-                    image_data = base64.b64decode(
-                        element["metadata"]["image_base64"])
-                    print(f"\n ------------------------------image_data: {image_data}")
-                    image = Image.open(BytesIO(image_data)).convert("RGB") # Use BytesIO here
-                    image = upscale_image(image, scale=2)
-                    # image.show(title=f"Extracted Image {i+1}")
-                    # MODIFIED: Store image directly to BytesIO to avoid saving to disk if not needed
-                    # and then converting back to base64.
-                    img_buffer = BytesIO()
-                    image.save(img_buffer, format="PNG")
-                    img_bytes = img_buffer.getvalue()
-                    img_base64 = base64.b64encode(img_bytes).decode("utf-8")
-                    print(f"\n------------------------------------------------Image_Base64: {img_base64}")
-                    # Optionally save image to disk if desired for debugging/permanent storage
-                    image_path = extracted_image_subdir / f"Sprite_{i+1}.png"
-                    image.save(image_path)
-                    prompt_combined = """
-                    Analyze this image and return JSON with keys:# modify prompt for "name", if it detects "code-blocks only then give name as 'scratch-block'"
-                    {
-                    "name": "<short name or 'scratch blocks'>" ,
-                    "description": "<short description>"
-                    }
-                    Guidelines:
-                    - If image contains logical/code blocks from Scratch (e.g., move, turn, repeat, when clicked, etc.), use 'scratch-block' as the name.
-                    - If image is a character, object, or backdrop, give an appropriate descriptive name instead.
-                    - Avoid generic names like 'image1' or 'picture'.
-                    - Keep the response strictly in JSON format.
-                    """
-                    content = [
-                        {"type": "text", "text": prompt_combined},
-                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_base64}"}}
-                    ]
-                    response = agent.invoke({"messages": [{"role": "user", "content": content}]})
-                    # Ensure response is handled correctly, it might be a string that needs json.loads
-                    try:
-                        # Assuming the agent returns a dictionary with 'messages' key,
-                        # and the last message's content is the JSON string.
-                        response_content_str = response.get("messages", [])[-1].content
-                        result_json = json.loads(response_content_str)
-                    except (json.JSONDecodeError, IndexError, AttributeError) as e:
-                        logger.error(f"⚠️ Failed to parse agent response as JSON: {e}. Response was: {response}", exc_info=True)
-                        result_json = {} # Default to empty dict if parsing fails
-                    try:
-                        name = result_json.get("name", "").strip()
-                        description = result_json.get("description", "").strip()
-                    except Exception as e:
-                        logger.error(f"⚠️ Failed to extract name/description from result_json: {str(e)}", exc_info=True)
-                        name = "unknown"
-                        description = "unknown"
-                    manipulated_json[f"Sprite {sprite_count}"] = {
-                        "name": name,
-                        "base64": element["metadata"]["image_base64"],
-                        "file-path": pdf_dir_path,
-                        "description": description
-                    }
-                    print(f"\n ------------------elemente: {element['metadata']['image_base64']}")
-                    print(f"\n ------------------pdf_dir_path: {pdf_dir_path}")
-                    sprite_count += 1
-                    print(f"\n===================manipulated JSON: {manipulated_json}")
-                except Exception as e:
-                    logger.error(f"⚠️ Error processing Sprite {i+1}: {str(e)}", exc_info=True)
-        # Save manipulated JSON
-        with open(final_json_path, "w") as sprite_file:
-            json.dump(manipulated_json, sprite_file, indent=4)
-        def is_code_block(name: str) -> bool:
-            for kw in scratch_keywords:
-                if kw.lower() in name.lower():
-                    return True
-            return False
-        # Filter out code block images
-        filtered_sprites = {}
-        for key, value in manipulated_json.items():
-            sprite_name = value.get("name", "")
-            if not is_code_block(sprite_name):
-                filtered_sprites[key] = value
-            else:
-                logger.info(f"🛑 Excluded code block-like image: {key}")
-        # Overwrite with filtered content
-        with open(final_json_path_2, "w") as sprite_file:
-            json.dump(filtered_sprites, sprite_file, indent=4)
-        # MODIFIED RETURN VALUE: Return the Path to the primary extracted_sprites.json file
-        # and the directory where it's located.
-        return final_json_path, json_subdir # Return the file path and its parent directory
     except Exception as e:
         raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
@@ -1857,12 +1756,20 @@ def similarity_matching(input_json_path: str, project_folder: str) -> str:
     with open(input_json_path, 'r') as f:
         sprites_data = json.load(f)
-    sprite_ids, texts, sprite_base64 = [], [], []
     for sid, sprite in sprites_data.items():
         sprite_ids.append(sid)
-        texts.append("This is " + sprite.get("description", sprite.get("name", "")))
         sprite_base64.append(sprite["base64"])
     # =========================================
     #  Build the list of all candidate images
     # =========================================
@@ -1891,24 +1798,31 @@ def similarity_matching(input_json_path: str, project_folder: str) -> str:
     # -----------------------------------------
     with open(f"{BLOCKS_DIR}/embeddings.json", "r") as f:
         embedding_json = json.load(f)
-    img_matrix = np.array([img["embeddings"] for img in embedding_json])
     # =========================================
     #  Decode & embed each sprite image
     # =========================================
-    sprite_features = []
-    for b64 in sprite_base64:
-        if "," in b64:
-            b64 = b64.split(",", 1)[1]
-        img_bytes = base64.b64decode(b64)
-        pil_img = Image.open(BytesIO(img_bytes)).convert("RGB")
-        buf = BytesIO()
-        pil_img.save(buf, format="PNG")
-        buf.seek(0)
-        feats = clip_embd.embed_image([buf])[0]
-        sprite_features.append(feats)
     sprite_matrix = np.vstack(sprite_features)
     # =========================================
     #  Compute similarities & pick best match
     # =========================================
@@ -2270,19 +2184,22 @@ def create_sb3_archive(project_folder, project_id):
             os.remove(sb3_path)
         return sb3_path
-def save_pdf_to_generated_dir(pdf_path: str, project_id: str) -> str:
     """
-    Copies the PDF at `pdf_path` into GEN_PROJECT_DIR/project_id/,
     renaming it to <project_id>.pdf.
     Args:
-        pdf_path (str):   Any existing path to a PDF file.
         project_id (str): Your unique project identifier.
     Returns:
         str: Path to the copied PDF in the generated directory,
              or None if something went wrong.
-    """
     try:
         # 1) Build the destination directory and base filename
         output_dir = GEN_PROJECT_DIR / project_id
@@ -2293,9 +2210,12 @@ def save_pdf_to_generated_dir(pdf_path: str, project_id: str) -> str:
         target_pdf = output_dir / f"{project_id}.pdf"
         print(f"\n--------------------------------target_pdf {target_pdf}")
         # 3) Copy the PDF
-        shutil.copy2(pdf_path, target_pdf)
-        print(f"Copied PDF from {pdf_path} → {target_pdf}")
-        logger.info(f"Copied PDF from {pdf_path} → {target_pdf}")
         return str(target_pdf)
@@ -2356,22 +2276,35 @@ def process_pdf():
         #           Create empty json in project_{random_id} folder                   #
         # =========================================================================== #
         #os.makedirs(project_folder, exist_ok=True)
         # Save the uploaded PDF temporarily
-        filename = secure_filename(pdf_file.filename)
-        temp_dir = tempfile.mkdtemp()
-        saved_pdf_path = os.path.join(temp_dir, filename)
-        pdf_file.save(saved_pdf_path)
-        pdf_doc = saved_pdf_path
-        pdf= save_pdf_to_generated_dir(saved_pdf_path, project_id)
         # logger.info(f"Created project folder: {project_folder}")
-        logger.info(f"Saved uploaded PDF to: {saved_pdf_path}")
         logger.info(f"Saved uploaded PDF to: {pdf_file}: {pdf}")
-        print("--------------------------------pdf_file_path---------------------",pdf_file,saved_pdf_path)
         # Extract & process
-        # output_path, result = extract_images_from_pdf(saved_pdf_path, json_path)
-        output_path, result = extract_images_from_pdf(saved_pdf_path)
-        print(" --------------------------------------- zip_path_str ---------------------------------------", output_path, result)
         # Check extracted_sprites.json for "scratch block" in any 'name'
         # extracted_dir = os.path.join(JSON_DIR, os.path.splitext(filename)[0])
         # extracted_sprites_json = os.path.join(extracted_dir, "extracted_sprites.json")
@@ -2388,8 +2321,7 @@ def process_pdf():
         with open(project_output, 'r') as f:
             project_skeleton = json.load(f)
-        images = convert_from_path(saved_pdf_path, dpi=300)
         print(type)
         page = images[0]
         # img_base64 = base64.b64encode(images).decode("utf-8")

 #def extract_images_from_pdf(pdf_path: Path, json_base_dir: Path, image_base_dir: Path):
 #def extract_images_from_pdf(pdf_path: Path, json_base_dir: Path):
+# Prepare manipulated sprite JSON structure
+manipulated_json = {}
+img_elements = []
+# { changes: "pdf_stream" in place of "pdf_path"
+def extract_images_from_pdf(pdf_stream: io.BytesIO):
     ''' Extract images from PDF and generate structured sprite JSON '''
     try:
+        # {
+        # pdf_path = Path(pdf_path)
+        # pdf_filename = pdf_path.stem               # e.g., "scratch_crab"
+        # pdf_dir_path = str(pdf_path.parent).replace("/", "\\")
+        # print("-------------------------------pdf_filename-------------------------------",pdf_filename)
+        # print("-------------------------------pdf_dir_path-------------------------------",pdf_dir_path)
+        if isinstance(pdf_stream, io.BytesIO):
+            # use a random ID since there's no filename
+            pdf_id = uuid.uuid4().hex
+        else:
+            pdf_id = os.path.splitext(os.path.basename(pdf_stream))[0]
+        # extracted_image_subdir = DETECTED_IMAGE_DIR / pdf_filename
+        # json_subdir = JSON_DIR / pdf_filename
+        # extracted_image_subdir.mkdir(parents=True, exist_ok=True)
+        # json_subdir.mkdir(parents=True, exist_ok=True)
+        # print("-------------------------------extracted_image_subdir-------------------------------",extracted_image_subdir)
+        # print("-------------------------------json_subdir-------------------------------",json_subdir)
+        # # Output paths (now using Path objects directly)
+        # output_json_path = json_subdir / "extracted.json"
+        # final_json_path = json_subdir / "extracted_sprites.json" # Path to extracted_sprites.json
+        # final_json_path_2 = json_subdir / "extracted_sprites_2.json"
+        # print("-------------------------------output_json_path-------------------------------",output_json_path)
+        # print("-------------------------------final_json_path-------------------------------",final_json_path)
+        # print("-------------------------------final_json_path_2-------------------------------",final_json_path_2)
+        # }
         try:
             elements = partition_pdf(
+                # filename=str(pdf_path), # partition_pdf might expect a string
+                file=pdf_stream, # 'file=', inplace of 'filename'
                 strategy="hi_res",
                 extract_image_block_types=["Image"],
                 hi_res_model_name="yolox",
             raise RuntimeError(
                 f"❌ Failed to extract images from PDF: {str(e)}")
+        file_elements = [element.to_dict() for element in elements]
+        #{
+        # try:
+        #     with open(output_json_path, "w") as f:
+        #         json.dump([element.to_dict()
+        #                   for element in elements], f, indent=4)
+        # except Exception as e:
+        #     raise RuntimeError(f"❌ Failed to write extracted.json: {str(e)}")
+        # try:
+        #     # Display extracted images
+        #     with open(output_json_path, 'r') as file:
+        #         file_elements = json.load(file)
+        # except Exception as e:
+        #     raise RuntimeError(f"❌ Failed to read extracted.json: {str(e)}")
+        # }
+        sprite_count = 1
+        for el in file_elements:
+            img_b64 = el["metadata"].get("image_base64")
+            if not img_b64:
+                continue
+            manipulated_json[f"Sprite {sprite_count}"] = {
+                # "id":auto_id,
+                # "name": name,
+                "base64": el["metadata"]["image_base64"],
+                "file-path": pdf_id,
+                # "description": description
+            }
+            sprite_count += 1
+        return manipulated_json
     except Exception as e:
         raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
     with open(input_json_path, 'r') as f:
         sprites_data = json.load(f)
+    sprite_ids, sprite_base64 = [], []
     for sid, sprite in sprites_data.items():
         sprite_ids.append(sid)
+        # texts.append("This is " + sprite.get("description", sprite.get("name", "")))
         sprite_base64.append(sprite["base64"])
+    sprite_images_bytes = []
+    for b64 in sprite_base64:
+        img = Image.open(BytesIO(base64.b64decode(b64.split(",")[-1]))).convert("RGB")
+        buffer = BytesIO()
+        img.save(buffer, format="PNG")
+        buffer.seek(0)
+        sprite_images_bytes.append(buffer)
     # =========================================
     #  Build the list of all candidate images
     # =========================================
     # -----------------------------------------
     with open(f"{BLOCKS_DIR}/embeddings.json", "r") as f:
         embedding_json = json.load(f)
     # =========================================
     #  Decode & embed each sprite image
     # =========================================
+    # sprite_features = []
+    # for b64 in sprite_base64:
+    #     if "," in b64:
+    #         b64 = b64.split(",", 1)[1]
+    #     img_bytes = base64.b64decode(b64)
+    #     pil_img = Image.open(BytesIO(img_bytes)).convert("RGB")
+    #     buf = BytesIO()
+    #     pil_img.save(buf, format="PNG")
+    #     buf.seek(0)
+    #     feats = clip_embd.embed_image([buf])[0]
+    #     sprite_features.append(feats)
+    # ============================== #
+    #      EMBED SPRITE IMAGES       #
+    # ============================== #
+    sprite_features = clip_embd.embed_image(sprite_images_bytes)
     sprite_matrix = np.vstack(sprite_features)
+    img_matrix = np.array([img["embeddings"] for img in embedding_json])
     # =========================================
     #  Compute similarities & pick best match
     # =========================================
             os.remove(sb3_path)
         return sb3_path
+#{ changes -> pdf_stream replacement of pdf_path
+# def save_pdf_to_generated_dir(pdf_path: str, project_id: str) -> str:
+def save_pdf_to_generated_dir(pdf_stream: io.BytesIO, project_id: str) -> str:
     """
+    Copies the PDF at `pdf_stream` into GEN_PROJECT_DIR/project_id/,
     renaming it to <project_id>.pdf.
     Args:
+        pdf_stream (io.BytesIO):   Any existing stream to a PDF file.
         project_id (str): Your unique project identifier.
     Returns:
         str: Path to the copied PDF in the generated directory,
              or None if something went wrong.
+    """
+    # }
     try:
         # 1) Build the destination directory and base filename
         output_dir = GEN_PROJECT_DIR / project_id
         target_pdf = output_dir / f"{project_id}.pdf"
         print(f"\n--------------------------------target_pdf {target_pdf}")
         # 3) Copy the PDF
+        # {
+        # shutil.copy2(pdf_path, target_pdf)
+        shutil.copy2(pdf_stream, target_pdf)
+        print(f"Copied PDF from {pdf_stream} → {target_pdf}")
+        logger.info(f"Copied PDF from {pdf_stream} → {target_pdf}")
+        # }
         return str(target_pdf)
         #           Create empty json in project_{random_id} folder                   #
         # =========================================================================== #
         #os.makedirs(project_folder, exist_ok=True)
+        # {
         # Save the uploaded PDF temporarily
+        # filename = secure_filename(pdf_file.filename)
+        # temp_dir = tempfile.mkdtemp()
+        # saved_pdf_path = os.path.join(temp_dir, filename)
+        # pdf_file.save(saved_pdf_path)
+        # pdf_doc = saved_pdf_path
+        pdf_bytes = pdf_file.read()
+        pdf_stream = io.BytesIO(pdf_bytes)
+        logger.info(f"Saved uploaded PDF to: {pdf_stream}")
+        # pdf= save_pdf_to_generated_dir(saved_pdf_path, project_id)
+        pdf= save_pdf_to_generated_dir(pdf_stream, project_id)
         # logger.info(f"Created project folder: {project_folder}")
+        # logger.info(f"Saved uploaded PDF to: {saved_pdf_path}")
         logger.info(f"Saved uploaded PDF to: {pdf_file}: {pdf}")
+        print("--------------------------------pdf_file_path---------------------",pdf_file,pdf_stream)
+        # }
+        # {
         # Extract & process
+        # output_path, result = extract_images_from_pdf(saved_pdf_path)
+        output_path = extract_images_from_pdf(pdf_stream)
+        print(" --------------------------------------- zip_path_str ---------------------------------------", output_path)
+        # }
         # Check extracted_sprites.json for "scratch block" in any 'name'
         # extracted_dir = os.path.join(JSON_DIR, os.path.splitext(filename)[0])
         # extracted_sprites_json = os.path.join(extracted_dir, "extracted_sprites.json")
         with open(project_output, 'r') as f:
             project_skeleton = json.load(f)
+        images = convert_from_path(pdf_stream, dpi=300)
         print(type)
         page = images[0]
         # img_base64 = base64.b64encode(images).decode("utf-8")