File size: 5,470 Bytes
cba2c8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# import base64
# import os
# import requests

# API_URL = "https://j103s9d2e4dccfoc.aistudio-app.com/layout-parsing"
# TOKEN = "3f77a7dbc0756fef96d9d6791f485b062106ef51"

# # file_path = r"C:\Users\shiva\OneDrive\Desktop\ip.pdf"

# # ---------- READ PDF ----------
# def VL_model(file_path: str):
#     with open(file_path, "rb") as file:
#         file_bytes = file.read()
#         file_data = base64.b64encode(file_bytes).decode("ascii")

#     headers = {
#         "Authorization": f"token {TOKEN}",
#         "Content-Type": "application/json"
#     }

#     payload = {
#         "file": file_data,
#         "fileType": 0,
#         "useDocOrientationClassify": False,
#         "useDocUnwarping": False,
#         "useChartRecognition": False,
#     }

#     response = requests.post(API_URL, json=payload, headers=headers)
#     assert response.status_code == 200

#     result = response.json()["result"]

#     # ---------- OUTPUT ----------
#     output_dir = "img2"
#     os.makedirs(output_dir, exist_ok=True)

#     FINAL_MD_PATH = os.path.join(output_dir, "final_document.md")

#     with open(FINAL_MD_PATH, "w", encoding="utf-8") as final_md:

#         for i, res in enumerate(result["layoutParsingResults"]):

#             # Page separator (VERY IMPORTANT for parsing later)
#             final_md.write(f"\n\n---\n## PAGE {i+1}\n---\n\n")

#             # Append markdown text
#             final_md.write(res["markdown"]["text"])
#             final_md.write("\n")

#             # Save embedded images
#             for img_path, img_url in res["markdown"]["images"].items():
#                 full_img_path = os.path.join(output_dir, img_path)
#                 os.makedirs(os.path.dirname(full_img_path), exist_ok=True)

#                 img_bytes = requests.get(img_url).content
#                 with open(full_img_path, "wb") as img_file:
#                     img_file.write(img_bytes)

#             # Save detected layout images (figures, tables)
#             for img_name, img_url in res["outputImages"].items():
#                 img_response = requests.get(img_url)
#                 if img_response.status_code == 200:
#                     filename = os.path.join(output_dir, f"{img_name}_page{i+1}.jpg")
#                     with open(filename, "wb") as f:
#                         f.write(img_response.content)

#     print(f"βœ… Single merged markdown saved at: {FINAL_MD_PATH}")
#     return FINAL_MD_PATH
import base64
import os
import requests

API_URL = "https://j103s9d2e4dccfoc.aistudio-app.com/layout-parsing"

from dotenv import load_dotenv
load_dotenv()
TOKEN = os.getenv("TOKEN")

def VL_model(file_path: str, query_id: int):
    # ---------- READ PDF ----------
    with open(file_path, "rb") as file:
        file_data = base64.b64encode(file.read()).decode("ascii")

    headers = {
        "Authorization": f"token {TOKEN}",
        "Content-Type": "application/json"
    }

    payload = {
        "file": file_data,
        "fileType": 0,
        "useDocOrientationClassify": False,
        "useDocUnwarping": False,
        "useChartRecognition": False,
    }

    response = requests.post(API_URL, json=payload, headers=headers)
    response.raise_for_status()
    result = response.json()["result"]

    # ---------- OUTPUT STRUCTURE ----------
    BASE_DIR = "vl_output_bro"
    IMG_DIR = os.path.join(BASE_DIR, "imgs")
    QUERY_DIR = os.path.join(BASE_DIR, f"query_{query_id}")

    os.makedirs(IMG_DIR, exist_ok=True)
    os.makedirs(QUERY_DIR, exist_ok=True)

    FINAL_MD_PATH = os.path.join(QUERY_DIR, "final_document.md")

    with open(FINAL_MD_PATH, "w", encoding="utf-8") as final_md:

        for page_no, res in enumerate(result["layoutParsingResults"], start=1):

            # ---------- PAGE HEADER ----------
            final_md.write(f"\n\n---\n## PAGE {page_no}\n---\n\n")
            final_md.write(res["markdown"]["text"])
            final_md.write("\n")

            # ---------- EMBEDDED MARKDOWN IMAGES ----------
            for img_path, img_url in res["markdown"]["images"].items():
                # βœ… USE EXACT SAME IMAGE NAME AS HTML
                img_name = os.path.basename(img_path)
                img_file_path = os.path.join(IMG_DIR, img_name)

                if not os.path.exists(img_file_path):
                    img_bytes = requests.get(img_url).content
                    with open(img_file_path, "wb") as img_file:
                        img_file.write(img_bytes)

                # βœ… RELATIVE PATH FROM query_X β†’ imgs
                final_md.write(f"\n![img](../imgs/{img_name})\n")

            # ---------- DETECTED LAYOUT IMAGES ----------
            for img_name, img_url in res["outputImages"].items():
                img_name = f"{img_name}.jpg"
                img_file_path = os.path.join(IMG_DIR, img_name)

                if not os.path.exists(img_file_path):
                    img_bytes = requests.get(img_url).content
                    with open(img_file_path, "wb") as f:
                        f.write(img_bytes)

    print(f"βœ… VL markdown saved at: {FINAL_MD_PATH}")
    print(f"βœ… Images saved in: {IMG_DIR}")
    return FINAL_MD_PATH
# VL_model(file_path=r"C:\\Users\\shiva\\OneDrive\\Desktop\\mini 2.0\\queries\\1c835fef22ef433288d92f00f24f21aa_daa.pdf", query_id=1)