Update page_files/categorized/Backend/Pdf_ImageExtraction.py
Browse files
page_files/categorized/Backend/Pdf_ImageExtraction.py
CHANGED
|
@@ -89,9 +89,8 @@ def robust_json_load(text: str):
|
|
| 89 |
return parsed
|
| 90 |
|
| 91 |
|
| 92 |
-
def get_plot_data_from_llm(
|
| 93 |
-
sample_file =
|
| 94 |
-
|
| 95 |
prompt = prompt = """
|
| 96 |
Analyze this PDF and identify ONLY true data plots:
|
| 97 |
(Line, Scatter, Bar, Histogram, Heatmap, Box, Violin).
|
|
@@ -144,11 +143,11 @@ IMPORTANT: Only include items where figure_kind="plot".
|
|
| 144 |
}
|
| 145 |
|
| 146 |
# Some environments accept response_mime_type; if yours errors, remove it.
|
| 147 |
-
response =
|
| 148 |
-
|
| 149 |
-
|
|
|
|
| 150 |
)
|
| 151 |
-
|
| 152 |
data = robust_json_load(response.text)
|
| 153 |
|
| 154 |
# Hard filter if model still returns nonplots
|
|
@@ -348,10 +347,9 @@ def main():
|
|
| 348 |
tmp.write(uploaded_file.getbuffer())
|
| 349 |
st.session_state.temp_pdf = tmp.name
|
| 350 |
|
| 351 |
-
genai.
|
| 352 |
-
model_name = get_available_model(api_key)
|
| 353 |
-
|
| 354 |
-
st.session_state.raw_data = get_plot_data_from_llm(active_model, st.session_state.temp_pdf)
|
| 355 |
|
| 356 |
# Results Display
|
| 357 |
if st.session_state.raw_data and st.session_state.temp_pdf:
|
|
|
|
| 89 |
return parsed
|
| 90 |
|
| 91 |
|
| 92 |
+
def get_plot_data_from_llm(client, model_name: str, pdf_path: str):
|
| 93 |
+
sample_file = client.files.upload(path=pdf_path)
|
|
|
|
| 94 |
prompt = prompt = """
|
| 95 |
Analyze this PDF and identify ONLY true data plots:
|
| 96 |
(Line, Scatter, Bar, Histogram, Heatmap, Box, Violin).
|
|
|
|
| 143 |
}
|
| 144 |
|
| 145 |
# Some environments accept response_mime_type; if yours errors, remove it.
|
| 146 |
+
response = client.models.generate_content(
|
| 147 |
+
model=model_name,
|
| 148 |
+
contents=[sample_file, prompt],
|
| 149 |
+
config={"temperature": 0.0, "max_output_tokens": 4096}
|
| 150 |
)
|
|
|
|
| 151 |
data = robust_json_load(response.text)
|
| 152 |
|
| 153 |
# Hard filter if model still returns nonplots
|
|
|
|
| 347 |
tmp.write(uploaded_file.getbuffer())
|
| 348 |
st.session_state.temp_pdf = tmp.name
|
| 349 |
|
| 350 |
+
client = genai.Client(api_key=api_key)
|
| 351 |
+
model_name = get_available_model(api_key)
|
| 352 |
+
st.session_state.raw_data = get_plot_data_from_llm(client, model_name, st.session_state.temp_pdf)
|
|
|
|
| 353 |
|
| 354 |
# Results Display
|
| 355 |
if st.session_state.raw_data and st.session_state.temp_pdf:
|