pavansuresh commited on
Commit
4c19d6c
·
verified ·
1 Parent(s): 9f54a59

Update ai_mapping.py

Browse files
Files changed (1) hide show
  1. ai_mapping.py +14 -3
ai_mapping.py CHANGED
@@ -1,7 +1,7 @@
1
  from transformers import LayoutLMv3Tokenizer, LayoutLMv3ForTokenClassification, LayoutLMv3ImageProcessor
2
  import torch
3
  from PIL import Image
4
- import pdf2image
5
  from typing import Dict, List
6
  import os
7
  from huggingface_hub import login
@@ -33,8 +33,14 @@ def extract_key_values_with_layoutlm(text_data: str, pdf_path: str) -> Dict[str,
33
  key_values.update({"Date": dates[0] if dates else "", "Amount": amounts[0] if amounts else ""})
34
 
35
  # Attempt LayoutLMv3 processing
36
- images = pdf2image.convert_from_path(pdf_path)
37
- for i, image in enumerate(images):
 
 
 
 
 
 
38
  encoding = feature_extractor(images=[image], text=text_data.splitlines(), return_tensors="pt")
39
  input_ids = encoding["input_ids"]
40
  attention_mask = encoding["attention_mask"]
@@ -58,6 +64,11 @@ def extract_key_values_with_layoutlm(text_data: str, pdf_path: str) -> Dict[str,
58
  if current_key and current_value:
59
  key_values[current_key] = " ".join(current_value).strip()
60
 
 
 
 
 
 
61
  return key_values if key_values else {"status": "failed", "error": "No key-value pairs extracted", "key_values": {}}
62
  except Exception as e:
63
  return {"status": "failed", "error": str(e), "key_values": {}}
 
1
  from transformers import LayoutLMv3Tokenizer, LayoutLMv3ForTokenClassification, LayoutLMv3ImageProcessor
2
  import torch
3
  from PIL import Image
4
+ import fitz # PyMuPDF
5
  from typing import Dict, List
6
  import os
7
  from huggingface_hub import login
 
33
  key_values.update({"Date": dates[0] if dates else "", "Amount": amounts[0] if amounts else ""})
34
 
35
  # Attempt LayoutLMv3 processing
36
+ doc = fitz.open(pdf_path)
37
+ for page_num in range(len(doc)):
38
+ page = doc[page_num]
39
+ pix = page.get_pixmap(matrix=fitz.Matrix(300/72, 300/72)) # 300 DPI
40
+ img_path = f"{pdf_path}_page_{page_num}.png"
41
+ pix.save(img_path)
42
+ image = Image.open(img_path)
43
+
44
  encoding = feature_extractor(images=[image], text=text_data.splitlines(), return_tensors="pt")
45
  input_ids = encoding["input_ids"]
46
  attention_mask = encoding["attention_mask"]
 
64
  if current_key and current_value:
65
  key_values[current_key] = " ".join(current_value).strip()
66
 
67
+ # Clean up temporary image
68
+ if os.path.exists(img_path):
69
+ os.unlink(img_path)
70
+
71
+ doc.close()
72
  return key_values if key_values else {"status": "failed", "error": "No key-value pairs extracted", "key_values": {}}
73
  except Exception as e:
74
  return {"status": "failed", "error": str(e), "key_values": {}}