prthm11 commited on
Commit
cabfc89
·
verified ·
1 Parent(s): 85c969e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -204
app.py CHANGED
@@ -1652,35 +1652,46 @@ scratch_keywords = [
1652
 
1653
  #def extract_images_from_pdf(pdf_path: Path, json_base_dir: Path, image_base_dir: Path):
1654
  #def extract_images_from_pdf(pdf_path: Path, json_base_dir: Path):
1655
- def extract_images_from_pdf(pdf_path: Path):
 
 
 
 
 
1656
  ''' Extract images from PDF and generate structured sprite JSON '''
1657
  try:
1658
- pdf_path = Path(pdf_path)
1659
- pdf_filename = pdf_path.stem # e.g., "scratch_crab"
1660
- pdf_dir_path = str(pdf_path.parent).replace("/", "\\")
1661
- print("-------------------------------pdf_filename-------------------------------",pdf_filename)
1662
- print("-------------------------------pdf_dir_path-------------------------------",pdf_dir_path)
1663
- # Create subfolders under the provided base directories
1664
- # This will create paths like:
1665
- # /app/detected_images/pdf_filename/
1666
- # /app/json_data/pdf_filename/
1667
- extracted_image_subdir = DETECTED_IMAGE_DIR / pdf_filename
1668
- json_subdir = JSON_DIR / pdf_filename
1669
- extracted_image_subdir.mkdir(parents=True, exist_ok=True)
1670
- json_subdir.mkdir(parents=True, exist_ok=True)
1671
- print("-------------------------------extracted_image_subdir-------------------------------",extracted_image_subdir)
1672
- print("-------------------------------json_subdir-------------------------------",json_subdir)
1673
- # Output paths (now using Path objects directly)
1674
- output_json_path = json_subdir / "extracted.json"
1675
- final_json_path = json_subdir / "extracted_sprites.json" # Path to extracted_sprites.json
1676
- final_json_path_2 = json_subdir / "extracted_sprites_2.json"
1677
- print("-------------------------------output_json_path-------------------------------",output_json_path)
1678
- print("-------------------------------final_json_path-------------------------------",final_json_path)
1679
- print("-------------------------------final_json_path_2-------------------------------",final_json_path_2)
 
 
 
 
1680
 
 
1681
  try:
1682
  elements = partition_pdf(
1683
- filename=str(pdf_path), # partition_pdf might expect a string
 
1684
  strategy="hi_res",
1685
  extract_image_block_types=["Image"],
1686
  hi_res_model_name="yolox",
@@ -1691,151 +1702,39 @@ def extract_images_from_pdf(pdf_path: Path):
1691
  raise RuntimeError(
1692
  f"❌ Failed to extract images from PDF: {str(e)}")
1693
 
1694
- try:
1695
- with open(output_json_path, "w") as f:
1696
- json.dump([element.to_dict()
1697
- for element in elements], f, indent=4)
1698
- except Exception as e:
1699
- raise RuntimeError(f"❌ Failed to write extracted.json: {str(e)}")
1700
-
1701
- try:
1702
- # Display extracted images
1703
- with open(output_json_path, 'r') as file:
1704
- file_elements = json.load(file)
1705
- except Exception as e:
1706
- raise RuntimeError(f"❌ Failed to read extracted.json: {str(e)}")
1707
-
1708
- # Prepare manipulated sprite JSON structure
1709
- manipulated_json = {}
1710
-
1711
- # SET A SYSTEM PROMPT
1712
- system_prompt = """
1713
- You are an expert in visual scene understanding.
1714
- Your Job is to analyze an image and respond acoording if asked for name give simple name by analyzing it and if ask for descrption generate a short description covering its elements.
 
 
1715
 
1716
- Guidelines:
1717
- - Focus only the images given in Square Shape.
1718
- - Don't Consider Blank areas in Image as.
1719
- - Don't include generic summary or explanation outside the fields.
1720
- Return only string.
1721
- """
1722
- agent = create_react_agent(
1723
- model=llm,
1724
- tools=[],
1725
- prompt=system_prompt
1726
- )
1727
-
1728
- # If JSON already exists, load it and find the next available Sprite number
1729
- if final_json_path.exists(): # Use Path.exists()
1730
- with open(final_json_path, "r") as existing_file:
1731
- manipulated = json.load(existing_file)
1732
- # Determine the next available index (e.g., Sprite 4 if 1–3 already exist)
1733
- existing_keys = [int(k.replace("Sprite ", ""))
1734
- for k in manipulated.keys()]
1735
- start_count = max(existing_keys, default=0) + 1
1736
- else:
1737
- start_count = 1
1738
-
1739
- sprite_count = start_count
1740
- for i, element in enumerate(file_elements):
1741
- if "image_base64" in element["metadata"]:
1742
- try:
1743
- image_data = base64.b64decode(
1744
- element["metadata"]["image_base64"])
1745
- print(f"\n ------------------------------image_data: {image_data}")
1746
- image = Image.open(BytesIO(image_data)).convert("RGB") # Use BytesIO here
1747
-
1748
- image = upscale_image(image, scale=2)
1749
- # image.show(title=f"Extracted Image {i+1}")
1750
-
1751
- # MODIFIED: Store image directly to BytesIO to avoid saving to disk if not needed
1752
- # and then converting back to base64.
1753
- img_buffer = BytesIO()
1754
- image.save(img_buffer, format="PNG")
1755
- img_bytes = img_buffer.getvalue()
1756
- img_base64 = base64.b64encode(img_bytes).decode("utf-8")
1757
- print(f"\n------------------------------------------------Image_Base64: {img_base64}")
1758
- # Optionally save image to disk if desired for debugging/permanent storage
1759
- image_path = extracted_image_subdir / f"Sprite_{i+1}.png"
1760
- image.save(image_path)
1761
-
1762
- prompt_combined = """
1763
- Analyze this image and return JSON with keys:# modify prompt for "name", if it detects "code-blocks only then give name as 'scratch-block'"
1764
- {
1765
- "name": "<short name or 'scratch blocks'>" ,
1766
- "description": "<short description>"
1767
- }
1768
- Guidelines:
1769
- - If image contains logical/code blocks from Scratch (e.g., move, turn, repeat, when clicked, etc.), use 'scratch-block' as the name.
1770
- - If image is a character, object, or backdrop, give an appropriate descriptive name instead.
1771
- - Avoid generic names like 'image1' or 'picture'.
1772
- - Keep the response strictly in JSON format.
1773
- """
1774
-
1775
- content = [
1776
- {"type": "text", "text": prompt_combined},
1777
- {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_base64}"}}
1778
- ]
1779
-
1780
- response = agent.invoke({"messages": [{"role": "user", "content": content}]})
1781
-
1782
- # Ensure response is handled correctly, it might be a string that needs json.loads
1783
- try:
1784
- # Assuming the agent returns a dictionary with 'messages' key,
1785
- # and the last message's content is the JSON string.
1786
- response_content_str = response.get("messages", [])[-1].content
1787
- result_json = json.loads(response_content_str)
1788
- except (json.JSONDecodeError, IndexError, AttributeError) as e:
1789
- logger.error(f"⚠️ Failed to parse agent response as JSON: {e}. Response was: {response}", exc_info=True)
1790
- result_json = {} # Default to empty dict if parsing fails
1791
-
1792
- try:
1793
- name = result_json.get("name", "").strip()
1794
- description = result_json.get("description", "").strip()
1795
- except Exception as e:
1796
- logger.error(f"⚠️ Failed to extract name/description from result_json: {str(e)}", exc_info=True)
1797
- name = "unknown"
1798
- description = "unknown"
1799
-
1800
- manipulated_json[f"Sprite {sprite_count}"] = {
1801
- "name": name,
1802
- "base64": element["metadata"]["image_base64"],
1803
- "file-path": pdf_dir_path,
1804
- "description": description
1805
- }
1806
- print(f"\n ------------------elemente: {element['metadata']['image_base64']}")
1807
- print(f"\n ------------------pdf_dir_path: {pdf_dir_path}")
1808
- sprite_count += 1
1809
- print(f"\n===================manipulated JSON: {manipulated_json}")
1810
- except Exception as e:
1811
- logger.error(f"⚠️ Error processing Sprite {i+1}: {str(e)}", exc_info=True)
1812
-
1813
- # Save manipulated JSON
1814
- with open(final_json_path, "w") as sprite_file:
1815
- json.dump(manipulated_json, sprite_file, indent=4)
1816
-
1817
- def is_code_block(name: str) -> bool:
1818
- for kw in scratch_keywords:
1819
- if kw.lower() in name.lower():
1820
- return True
1821
- return False
1822
-
1823
- # Filter out code block images
1824
- filtered_sprites = {}
1825
- for key, value in manipulated_json.items():
1826
- sprite_name = value.get("name", "")
1827
- if not is_code_block(sprite_name):
1828
- filtered_sprites[key] = value
1829
- else:
1830
- logger.info(f"🛑 Excluded code block-like image: {key}")
1831
-
1832
- # Overwrite with filtered content
1833
- with open(final_json_path_2, "w") as sprite_file:
1834
- json.dump(filtered_sprites, sprite_file, indent=4)
1835
-
1836
- # MODIFIED RETURN VALUE: Return the Path to the primary extracted_sprites.json file
1837
- # and the directory where it's located.
1838
- return final_json_path, json_subdir # Return the file path and its parent directory
1839
  except Exception as e:
1840
  raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
1841
 
@@ -1857,12 +1756,20 @@ def similarity_matching(input_json_path: str, project_folder: str) -> str:
1857
  with open(input_json_path, 'r') as f:
1858
  sprites_data = json.load(f)
1859
 
1860
- sprite_ids, texts, sprite_base64 = [], [], []
1861
  for sid, sprite in sprites_data.items():
1862
  sprite_ids.append(sid)
1863
- texts.append("This is " + sprite.get("description", sprite.get("name", "")))
1864
  sprite_base64.append(sprite["base64"])
1865
 
 
 
 
 
 
 
 
 
1866
  # =========================================
1867
  # Build the list of all candidate images
1868
  # =========================================
@@ -1891,24 +1798,31 @@ def similarity_matching(input_json_path: str, project_folder: str) -> str:
1891
  # -----------------------------------------
1892
  with open(f"{BLOCKS_DIR}/embeddings.json", "r") as f:
1893
  embedding_json = json.load(f)
1894
- img_matrix = np.array([img["embeddings"] for img in embedding_json])
1895
 
1896
  # =========================================
1897
  # Decode & embed each sprite image
1898
  # =========================================
1899
- sprite_features = []
1900
- for b64 in sprite_base64:
1901
- if "," in b64:
1902
- b64 = b64.split(",", 1)[1]
1903
- img_bytes = base64.b64decode(b64)
1904
- pil_img = Image.open(BytesIO(img_bytes)).convert("RGB")
1905
- buf = BytesIO()
1906
- pil_img.save(buf, format="PNG")
1907
- buf.seek(0)
1908
- feats = clip_embd.embed_image([buf])[0]
1909
- sprite_features.append(feats)
 
 
 
 
 
 
 
1910
  sprite_matrix = np.vstack(sprite_features)
1911
-
 
1912
  # =========================================
1913
  # Compute similarities & pick best match
1914
  # =========================================
@@ -2270,19 +2184,22 @@ def create_sb3_archive(project_folder, project_id):
2270
  os.remove(sb3_path)
2271
  return sb3_path
2272
 
2273
- def save_pdf_to_generated_dir(pdf_path: str, project_id: str) -> str:
 
 
2274
  """
2275
- Copies the PDF at `pdf_path` into GEN_PROJECT_DIR/project_id/,
2276
  renaming it to <project_id>.pdf.
2277
 
2278
  Args:
2279
- pdf_path (str): Any existing path to a PDF file.
2280
  project_id (str): Your unique project identifier.
2281
 
2282
  Returns:
2283
  str: Path to the copied PDF in the generated directory,
2284
  or None if something went wrong.
2285
- """
 
2286
  try:
2287
  # 1) Build the destination directory and base filename
2288
  output_dir = GEN_PROJECT_DIR / project_id
@@ -2293,9 +2210,12 @@ def save_pdf_to_generated_dir(pdf_path: str, project_id: str) -> str:
2293
  target_pdf = output_dir / f"{project_id}.pdf"
2294
  print(f"\n--------------------------------target_pdf {target_pdf}")
2295
  # 3) Copy the PDF
2296
- shutil.copy2(pdf_path, target_pdf)
2297
- print(f"Copied PDF from {pdf_path} → {target_pdf}")
2298
- logger.info(f"Copied PDF from {pdf_path} → {target_pdf}")
 
 
 
2299
 
2300
 
2301
  return str(target_pdf)
@@ -2356,22 +2276,35 @@ def process_pdf():
2356
  # Create empty json in project_{random_id} folder #
2357
  # =========================================================================== #
2358
  #os.makedirs(project_folder, exist_ok=True)
2359
-
 
2360
  # Save the uploaded PDF temporarily
2361
- filename = secure_filename(pdf_file.filename)
2362
- temp_dir = tempfile.mkdtemp()
2363
- saved_pdf_path = os.path.join(temp_dir, filename)
2364
- pdf_file.save(saved_pdf_path)
2365
- pdf_doc = saved_pdf_path
2366
- pdf= save_pdf_to_generated_dir(saved_pdf_path, project_id)
 
 
 
 
 
 
 
2367
  # logger.info(f"Created project folder: {project_folder}")
2368
- logger.info(f"Saved uploaded PDF to: {saved_pdf_path}")
2369
  logger.info(f"Saved uploaded PDF to: {pdf_file}: {pdf}")
2370
- print("--------------------------------pdf_file_path---------------------",pdf_file,saved_pdf_path)
 
 
 
2371
  # Extract & process
2372
- # output_path, result = extract_images_from_pdf(saved_pdf_path, json_path)
2373
- output_path, result = extract_images_from_pdf(saved_pdf_path)
2374
- print(" --------------------------------------- zip_path_str ---------------------------------------", output_path, result)
 
 
2375
  # Check extracted_sprites.json for "scratch block" in any 'name'
2376
  # extracted_dir = os.path.join(JSON_DIR, os.path.splitext(filename)[0])
2377
  # extracted_sprites_json = os.path.join(extracted_dir, "extracted_sprites.json")
@@ -2388,8 +2321,7 @@ def process_pdf():
2388
  with open(project_output, 'r') as f:
2389
  project_skeleton = json.load(f)
2390
 
2391
-
2392
- images = convert_from_path(saved_pdf_path, dpi=300)
2393
  print(type)
2394
  page = images[0]
2395
  # img_base64 = base64.b64encode(images).decode("utf-8")
 
1652
 
1653
  #def extract_images_from_pdf(pdf_path: Path, json_base_dir: Path, image_base_dir: Path):
1654
  #def extract_images_from_pdf(pdf_path: Path, json_base_dir: Path):
1655
+
1656
+ # Prepare manipulated sprite JSON structure
1657
+ manipulated_json = {}
1658
+ img_elements = []
1659
+ # { changes: "pdf_stream" in place of "pdf_path"
1660
+ def extract_images_from_pdf(pdf_stream: io.BytesIO):
1661
  ''' Extract images from PDF and generate structured sprite JSON '''
1662
  try:
1663
+ # {
1664
+ # pdf_path = Path(pdf_path)
1665
+ # pdf_filename = pdf_path.stem # e.g., "scratch_crab"
1666
+ # pdf_dir_path = str(pdf_path.parent).replace("/", "\\")
1667
+ # print("-------------------------------pdf_filename-------------------------------",pdf_filename)
1668
+ # print("-------------------------------pdf_dir_path-------------------------------",pdf_dir_path)
1669
+
1670
+ if isinstance(pdf_stream, io.BytesIO):
1671
+ # use a random ID since there's no filename
1672
+ pdf_id = uuid.uuid4().hex
1673
+ else:
1674
+ pdf_id = os.path.splitext(os.path.basename(pdf_stream))[0]
1675
+
1676
+ # extracted_image_subdir = DETECTED_IMAGE_DIR / pdf_filename
1677
+ # json_subdir = JSON_DIR / pdf_filename
1678
+ # extracted_image_subdir.mkdir(parents=True, exist_ok=True)
1679
+ # json_subdir.mkdir(parents=True, exist_ok=True)
1680
+ # print("-------------------------------extracted_image_subdir-------------------------------",extracted_image_subdir)
1681
+ # print("-------------------------------json_subdir-------------------------------",json_subdir)
1682
+ # # Output paths (now using Path objects directly)
1683
+ # output_json_path = json_subdir / "extracted.json"
1684
+ # final_json_path = json_subdir / "extracted_sprites.json" # Path to extracted_sprites.json
1685
+ # final_json_path_2 = json_subdir / "extracted_sprites_2.json"
1686
+ # print("-------------------------------output_json_path-------------------------------",output_json_path)
1687
+ # print("-------------------------------final_json_path-------------------------------",final_json_path)
1688
+ # print("-------------------------------final_json_path_2-------------------------------",final_json_path_2)
1689
 
1690
+ # }
1691
  try:
1692
  elements = partition_pdf(
1693
+ # filename=str(pdf_path), # partition_pdf might expect a string
1694
+ file=pdf_stream, # 'file=', inplace of 'filename'
1695
  strategy="hi_res",
1696
  extract_image_block_types=["Image"],
1697
  hi_res_model_name="yolox",
 
1702
  raise RuntimeError(
1703
  f"❌ Failed to extract images from PDF: {str(e)}")
1704
 
1705
+ file_elements = [element.to_dict() for element in elements]
1706
+
1707
+ #{
1708
+ # try:
1709
+ # with open(output_json_path, "w") as f:
1710
+ # json.dump([element.to_dict()
1711
+ # for element in elements], f, indent=4)
1712
+ # except Exception as e:
1713
+ # raise RuntimeError(f"❌ Failed to write extracted.json: {str(e)}")
1714
+
1715
+ # try:
1716
+ # # Display extracted images
1717
+ # with open(output_json_path, 'r') as file:
1718
+ # file_elements = json.load(file)
1719
+ # except Exception as e:
1720
+ # raise RuntimeError(f"❌ Failed to read extracted.json: {str(e)}")
1721
+ # }
1722
+
1723
+ sprite_count = 1
1724
+ for el in file_elements:
1725
+ img_b64 = el["metadata"].get("image_base64")
1726
+ if not img_b64:
1727
+ continue
1728
 
1729
+ manipulated_json[f"Sprite {sprite_count}"] = {
1730
+ # "id":auto_id,
1731
+ # "name": name,
1732
+ "base64": el["metadata"]["image_base64"],
1733
+ "file-path": pdf_id,
1734
+ # "description": description
1735
+ }
1736
+ sprite_count += 1
1737
+ return manipulated_json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1738
  except Exception as e:
1739
  raise RuntimeError(f"❌ Error in extract_images_from_pdf: {str(e)}")
1740
 
 
1756
  with open(input_json_path, 'r') as f:
1757
  sprites_data = json.load(f)
1758
 
1759
+ sprite_ids, sprite_base64 = [], []
1760
  for sid, sprite in sprites_data.items():
1761
  sprite_ids.append(sid)
1762
+ # texts.append("This is " + sprite.get("description", sprite.get("name", "")))
1763
  sprite_base64.append(sprite["base64"])
1764
 
1765
+ sprite_images_bytes = []
1766
+ for b64 in sprite_base64:
1767
+ img = Image.open(BytesIO(base64.b64decode(b64.split(",")[-1]))).convert("RGB")
1768
+ buffer = BytesIO()
1769
+ img.save(buffer, format="PNG")
1770
+ buffer.seek(0)
1771
+ sprite_images_bytes.append(buffer)
1772
+
1773
  # =========================================
1774
  # Build the list of all candidate images
1775
  # =========================================
 
1798
  # -----------------------------------------
1799
  with open(f"{BLOCKS_DIR}/embeddings.json", "r") as f:
1800
  embedding_json = json.load(f)
 
1801
 
1802
  # =========================================
1803
  # Decode & embed each sprite image
1804
  # =========================================
1805
+ # sprite_features = []
1806
+ # for b64 in sprite_base64:
1807
+ # if "," in b64:
1808
+ # b64 = b64.split(",", 1)[1]
1809
+
1810
+ # img_bytes = base64.b64decode(b64)
1811
+ # pil_img = Image.open(BytesIO(img_bytes)).convert("RGB")
1812
+ # buf = BytesIO()
1813
+ # pil_img.save(buf, format="PNG")
1814
+ # buf.seek(0)
1815
+ # feats = clip_embd.embed_image([buf])[0]
1816
+ # sprite_features.append(feats)
1817
+
1818
+ # ============================== #
1819
+ # EMBED SPRITE IMAGES #
1820
+ # ============================== #
1821
+ sprite_features = clip_embd.embed_image(sprite_images_bytes)
1822
+
1823
  sprite_matrix = np.vstack(sprite_features)
1824
+ img_matrix = np.array([img["embeddings"] for img in embedding_json])
1825
+
1826
  # =========================================
1827
  # Compute similarities & pick best match
1828
  # =========================================
 
2184
  os.remove(sb3_path)
2185
  return sb3_path
2186
 
2187
+ #{ changes -> pdf_stream replacement of pdf_path
2188
+ # def save_pdf_to_generated_dir(pdf_path: str, project_id: str) -> str:
2189
+ def save_pdf_to_generated_dir(pdf_stream: io.BytesIO, project_id: str) -> str:
2190
  """
2191
+ Copies the PDF at `pdf_stream` into GEN_PROJECT_DIR/project_id/,
2192
  renaming it to <project_id>.pdf.
2193
 
2194
  Args:
2195
+ pdf_stream (io.BytesIO): Any existing stream to a PDF file.
2196
  project_id (str): Your unique project identifier.
2197
 
2198
  Returns:
2199
  str: Path to the copied PDF in the generated directory,
2200
  or None if something went wrong.
2201
+ """
2202
+ # }
2203
  try:
2204
  # 1) Build the destination directory and base filename
2205
  output_dir = GEN_PROJECT_DIR / project_id
 
2210
  target_pdf = output_dir / f"{project_id}.pdf"
2211
  print(f"\n--------------------------------target_pdf {target_pdf}")
2212
  # 3) Copy the PDF
2213
+ # {
2214
+ # shutil.copy2(pdf_path, target_pdf)
2215
+ shutil.copy2(pdf_stream, target_pdf)
2216
+ print(f"Copied PDF from {pdf_stream} → {target_pdf}")
2217
+ logger.info(f"Copied PDF from {pdf_stream} → {target_pdf}")
2218
+ # }
2219
 
2220
 
2221
  return str(target_pdf)
 
2276
  # Create empty json in project_{random_id} folder #
2277
  # =========================================================================== #
2278
  #os.makedirs(project_folder, exist_ok=True)
2279
+
2280
+ # {
2281
  # Save the uploaded PDF temporarily
2282
+ # filename = secure_filename(pdf_file.filename)
2283
+ # temp_dir = tempfile.mkdtemp()
2284
+ # saved_pdf_path = os.path.join(temp_dir, filename)
2285
+ # pdf_file.save(saved_pdf_path)
2286
+ # pdf_doc = saved_pdf_path
2287
+
2288
+ pdf_bytes = pdf_file.read()
2289
+ pdf_stream = io.BytesIO(pdf_bytes)
2290
+ logger.info(f"Saved uploaded PDF to: {pdf_stream}")
2291
+
2292
+
2293
+ # pdf= save_pdf_to_generated_dir(saved_pdf_path, project_id)
2294
+ pdf= save_pdf_to_generated_dir(pdf_stream, project_id)
2295
  # logger.info(f"Created project folder: {project_folder}")
2296
+ # logger.info(f"Saved uploaded PDF to: {saved_pdf_path}")
2297
  logger.info(f"Saved uploaded PDF to: {pdf_file}: {pdf}")
2298
+ print("--------------------------------pdf_file_path---------------------",pdf_file,pdf_stream)
2299
+ # }
2300
+
2301
+ # {
2302
  # Extract & process
2303
+ # output_path, result = extract_images_from_pdf(saved_pdf_path)
2304
+ output_path = extract_images_from_pdf(pdf_stream)
2305
+ print(" --------------------------------------- zip_path_str ---------------------------------------", output_path)
2306
+ # }
2307
+
2308
  # Check extracted_sprites.json for "scratch block" in any 'name'
2309
  # extracted_dir = os.path.join(JSON_DIR, os.path.splitext(filename)[0])
2310
  # extracted_sprites_json = os.path.join(extracted_dir, "extracted_sprites.json")
 
2321
  with open(project_output, 'r') as f:
2322
  project_skeleton = json.load(f)
2323
 
2324
+ images = convert_from_path(pdf_stream, dpi=300)
 
2325
  print(type)
2326
  page = images[0]
2327
  # img_base64 = base64.b64encode(images).decode("utf-8")