Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,6 +4,7 @@ from huggingface_hub import hf_hub_download
|
|
| 4 |
from transformers import AutoImageProcessor, TableTransformerForObjectDetection
|
| 5 |
import torch
|
| 6 |
from PIL import Image
|
|
|
|
| 7 |
|
| 8 |
# Model and Image Processor Loading (ideally at the app start)
|
| 9 |
@st.cache_resource
|
|
@@ -15,17 +16,25 @@ def load_assets():
|
|
| 15 |
|
| 16 |
file_path, image_processor, model = load_assets()
|
| 17 |
|
|
|
|
| 18 |
# App Title
|
| 19 |
-
st.title(" Detection in
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
| 23 |
|
| 24 |
-
#
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
inputs = image_processor(images=image, return_tensors="pt")
|
| 28 |
-
outputs = model(**inputs)
|
| 29 |
|
| 30 |
target_sizes = torch.tensor([image.size[::-1]])
|
| 31 |
results = image_processor.post_process_object_detection(outputs, threshold=0.9, target_sizes=target_sizes)[0]
|
|
|
|
| 4 |
from transformers import AutoImageProcessor, TableTransformerForObjectDetection
|
| 5 |
import torch
|
| 6 |
from PIL import Image
|
| 7 |
+
import fitz # Import PyMuPDF (fitz)
|
| 8 |
|
| 9 |
# Model and Image Processor Loading (ideally at the app start)
|
| 10 |
@st.cache_resource
|
|
|
|
| 16 |
|
| 17 |
file_path, image_processor, model = load_assets()
|
| 18 |
|
| 19 |
+
|
| 20 |
# App Title
|
| 21 |
+
st.title("Table Detection in Documents")
|
| 22 |
+
|
| 23 |
+
# Document Upload
|
| 24 |
+
uploaded_file = st.file_uploader("Upload a document", type=["pdf", "docx", "doc"]) # Add more formats if needed
|
| 25 |
+
|
| 26 |
+
# Process Document and Display Results
|
| 27 |
+
if uploaded_file:
|
| 28 |
+
doc = fitz.open(stream=uploaded_file.getvalue(), filetype="pdf") # Open as PDF
|
| 29 |
|
| 30 |
+
for page_index in range(len(doc)):
|
| 31 |
+
page = doc.load_page(page_index)
|
| 32 |
+
pix = page.get_pixmap()
|
| 33 |
+
image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 34 |
|
| 35 |
+
# Table Detection (your existing logic)
|
| 36 |
+
inputs = image_processor(images=image, return_tensors="pt")
|
| 37 |
+
outputs = model(**inputs)
|
|
|
|
|
|
|
| 38 |
|
| 39 |
target_sizes = torch.tensor([image.size[::-1]])
|
| 40 |
results = image_processor.post_process_object_detection(outputs, threshold=0.9, target_sizes=target_sizes)[0]
|