Aadityaramrame commited on
Commit
5ea229f
·
verified ·
1 Parent(s): 7a1ecf6

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +6 -5
pipeline.py CHANGED
@@ -25,7 +25,6 @@ def extract_text_from_pdf(pdf_path):
25
  for page in pages:
26
  text += pytesseract.image_to_string(page) + "\n"
27
  return text.strip()
28
-
29
  def extract_key_values_with_gemini(raw_text, form_type="pancard_form"):
30
  prompt = f"""
31
  You are an intelligent document parser.
@@ -34,13 +33,15 @@ Return strictly as JSON key-value pairs.
34
  Document text:
35
  {raw_text}
36
  """
37
-
38
- # Use the supported model
39
  model = genai.GenerativeModel("models/gemini-2.5-flash")
40
  print("Gemini API called successfully ✅")
41
  response = model.generate_content(prompt)
 
 
 
 
42
 
43
  try:
44
- return json.loads(response.text)
45
  except Exception:
46
- return {"raw_output": response.text}
 
25
  for page in pages:
26
  text += pytesseract.image_to_string(page) + "\n"
27
  return text.strip()
 
28
  def extract_key_values_with_gemini(raw_text, form_type="pancard_form"):
29
  prompt = f"""
30
  You are an intelligent document parser.
 
33
  Document text:
34
  {raw_text}
35
  """
 
 
36
  model = genai.GenerativeModel("models/gemini-2.5-flash")
37
  print("Gemini API called successfully ✅")
38
  response = model.generate_content(prompt)
39
+ text = response.text.strip()
40
+
41
+ # --- Cleanup ---
42
+ text = text.replace("```json", "").replace("```", "").strip()
43
 
44
  try:
45
+ return json.loads(text)
46
  except Exception:
47
+ return {"raw_output": text}