AbhijitClemson commited on
Commit
b2afd40
·
verified ·
1 Parent(s): 1d61a58

Update page_files/categorized/Backend/Pdf_ImageExtraction.py

Browse files
page_files/categorized/Backend/Pdf_ImageExtraction.py CHANGED
@@ -89,9 +89,8 @@ def robust_json_load(text: str):
89
  return parsed
90
 
91
 
92
- def get_plot_data_from_llm(GEMINI_MODEL, pdf_path: str):
93
- sample_file = genai.upload_file(path=pdf_path)
94
-
95
  prompt = prompt = """
96
  Analyze this PDF and identify ONLY true data plots:
97
  (Line, Scatter, Bar, Histogram, Heatmap, Box, Violin).
@@ -144,11 +143,11 @@ IMPORTANT: Only include items where figure_kind="plot".
144
  }
145
 
146
  # Some environments accept response_mime_type; if yours errors, remove it.
147
- response = GEMINI_MODEL.generate_content(
148
- [sample_file, prompt],
149
- generation_config=generation_config
 
150
  )
151
-
152
  data = robust_json_load(response.text)
153
 
154
  # Hard filter if model still returns nonplots
@@ -348,10 +347,9 @@ def main():
348
  tmp.write(uploaded_file.getbuffer())
349
  st.session_state.temp_pdf = tmp.name
350
 
351
- genai.configure(api_key=api_key)
352
- model_name = get_available_model(api_key)
353
- active_model = genai.GenerativeModel(model_name)
354
- st.session_state.raw_data = get_plot_data_from_llm(active_model, st.session_state.temp_pdf)
355
 
356
  # Results Display
357
  if st.session_state.raw_data and st.session_state.temp_pdf:
 
89
  return parsed
90
 
91
 
92
+ def get_plot_data_from_llm(client, model_name: str, pdf_path: str):
93
+ sample_file = client.files.upload(path=pdf_path)
 
94
  prompt = prompt = """
95
  Analyze this PDF and identify ONLY true data plots:
96
  (Line, Scatter, Bar, Histogram, Heatmap, Box, Violin).
 
143
  }
144
 
145
  # Some environments accept response_mime_type; if yours errors, remove it.
146
+ response = client.models.generate_content(
147
+ model=model_name,
148
+ contents=[sample_file, prompt],
149
+ config={"temperature": 0.0, "max_output_tokens": 4096}
150
  )
 
151
  data = robust_json_load(response.text)
152
 
153
  # Hard filter if model still returns nonplots
 
347
  tmp.write(uploaded_file.getbuffer())
348
  st.session_state.temp_pdf = tmp.name
349
 
350
+ client = genai.Client(api_key=api_key)
351
+ model_name = get_available_model(api_key)
352
+ st.session_state.raw_data = get_plot_data_from_llm(client, model_name, st.session_state.temp_pdf)
 
353
 
354
  # Results Display
355
  if st.session_state.raw_data and st.session_state.temp_pdf: