Spaces:

PrimWong
/

PrimWong-layout_qa_hparam_tuning

Paused

App Files Files Community

PrimWong commited on May 2, 2024

Commit

30d7dbf

verified ·

1 Parent(s): f3cedd0

Upload 11 files

Browse files

Files changed (3) hide show

config.json +132 -0
handler.py +44 -0
pipeline.py +44 -0

config.json ADDED Viewed

	@@ -0,0 +1,132 @@

+{
+  "_name_or_path": "microsoft/layoutlmv2-base-uncased",
+  "architectures": [
+    "LayoutLMv2ForQuestionAnswering"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "convert_sync_batchnorm": true,
+  "coordinate_size": 128,
+  "detectron2_config_args": {
+    "MODEL.ANCHOR_GENERATOR.SIZES": [
+      [
+        32
+      ],
+      [
+        64
+      ],
+      [
+        128
+      ],
+      [
+        256
+      ],
+      [
+        512
+      ]
+    ],
+    "MODEL.BACKBONE.NAME": "build_resnet_fpn_backbone",
+    "MODEL.FPN.IN_FEATURES": [
+      "res2",
+      "res3",
+      "res4",
+      "res5"
+    ],
+    "MODEL.MASK_ON": true,
+    "MODEL.PIXEL_STD": [
+      57.375,
+      57.12,
+      58.395
+    ],
+    "MODEL.POST_NMS_TOPK_TEST": 1000,
+    "MODEL.RESNETS.ASPECT_RATIOS": [
+      [
+        0.5,
+        1.0,
+        2.0
+      ]
+    ],
+    "MODEL.RESNETS.DEPTH": 101,
+    "MODEL.RESNETS.NUM_GROUPS": 32,
+    "MODEL.RESNETS.OUT_FEATURES": [
+      "res2",
+      "res3",
+      "res4",
+      "res5"
+    ],
+    "MODEL.RESNETS.SIZES": [
+      [
+        32
+      ],
+      [
+        64
+      ],
+      [
+        128
+      ],
+      [
+        256
+      ],
+      [
+        512
+      ]
+    ],
+    "MODEL.RESNETS.STRIDE_IN_1X1": false,
+    "MODEL.RESNETS.WIDTH_PER_GROUP": 8,
+    "MODEL.ROI_BOX_HEAD.NAME": "FastRCNNConvFCHead",
+    "MODEL.ROI_BOX_HEAD.NUM_FC": 2,
+    "MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION": 14,
+    "MODEL.ROI_HEADS.IN_FEATURES": [
+      "p2",
+      "p3",
+      "p4",
+      "p5"
+    ],
+    "MODEL.ROI_HEADS.NAME": "StandardROIHeads",
+    "MODEL.ROI_HEADS.NUM_CLASSES": 5,
+    "MODEL.ROI_MASK_HEAD.NAME": "MaskRCNNConvUpsampleHead",
+    "MODEL.ROI_MASK_HEAD.NUM_CONV": 4,
+    "MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION": 7,
+    "MODEL.RPN.IN_FEATURES": [
+      "p2",
+      "p3",
+      "p4",
+      "p5",
+      "p6"
+    ],
+    "MODEL.RPN.POST_NMS_TOPK_TRAIN": 1000,
+    "MODEL.RPN.PRE_NMS_TOPK_TEST": 1000,
+    "MODEL.RPN.PRE_NMS_TOPK_TRAIN": 2000
+  },
+  "fast_qkv": true,
+  "gradient_checkpointing": false,
+  "has_relative_attention_bias": true,
+  "has_spatial_attention_bias": true,
+  "has_visual_segment_embedding": true,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "image_feature_pool_shape": [
+    7,
+    7,
+    256
+  ],
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_2d_position_embeddings": 1024,
+  "max_position_embeddings": 512,
+  "max_rel_2d_pos": 256,
+  "max_rel_pos": 128,
+  "model_type": "layoutlmv2",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "rel_2d_pos_bins": 64,
+  "rel_pos_bins": 32,
+  "shape_size": 128,
+  "torch_dtype": "float32",
+  "transformers_version": "4.35.2",
+  "type_vocab_size": 2,
+  "vocab_size": 30522
+}

handler.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from typing import Dict, Any
+from transformers import pipeline
+import holidays
+import PIL.Image
+import io
+import pytesseract
+class PreTrainedPipeline():
+    def __init__(self, model_path="PrimWong/layout_qa_hparam_tuning"):
+        # Initializing the document-question-answering pipeline with the specified model
+        self.pipeline = pipeline("document-question-answering", model=model_path)
+        self.holidays = holidays.US()
+    def __call__(self, data: Dict[str, Any]) -> str:
+        """
+        Process input data for document question answering with optional holiday checking.
+        Args:
+            data (Dict[str, Any]): Input data containing an 'inputs' field with 'image' and 'question',
+                                   and optionally a 'date' field.
+        Returns:
+            str: The answer to the question or a holiday message if applicable.
+        """
+        inputs = data.get('inputs', {})
+        date = data.get("date")
+        # Check if date is provided and if it's a holiday
+        if date and date in self.holidays:
+            return "Today is a holiday!"
+        # Process the image and question for document question answering
+        image_path = inputs.get("image")
+        question = inputs.get("question")
+        # Load and process an image
+        image = PIL.Image.open(image_path)
+        image_text = pytesseract.image_to_string(image)  # Use OCR to extract text
+        # Run prediction (Note: this now uses the extracted text, not the image directly)
+        prediction = self.pipeline(question=question, context=image_text)
+        return prediction["answer"]  # Adjust based on actual output format of the model
+# Note: This script assumes the use of pytesseract for OCR to process images. Ensure pytesseract is configured properly.

pipeline.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from typing import Dict, Any
+from transformers import pipeline
+import holidays
+import PIL.Image
+import io
+import pytesseract
+class PreTrainedPipeline():
+    def __init__(self, model_path="PrimWong/layout_qa_hparam_tuning"):
+        # Initializing the document-question-answering pipeline with the specified model
+        self.pipeline = pipeline("document-question-answering", model=model_path)
+        self.holidays = holidays.US()
+    def __call__(self, data: Dict[str, Any]) -> str:
+        """
+        Process input data for document question answering with optional holiday checking.
+        Args:
+            data (Dict[str, Any]): Input data containing an 'inputs' field with 'image' and 'question',
+                                   and optionally a 'date' field.
+        Returns:
+            str: The answer to the question or a holiday message if applicable.
+        """
+        inputs = data.get('inputs', {})
+        date = data.get("date")
+        # Check if date is provided and if it's a holiday
+        if date and date in self.holidays:
+            return "Today is a holiday!"
+        # Process the image and question for document question answering
+        image_path = inputs.get("image")
+        question = inputs.get("question")
+        # Load and process an image
+        image = PIL.Image.open(image_path)
+        image_text = pytesseract.image_to_string(image)  # Use OCR to extract text
+        # Run prediction (Note: this now uses the extracted text, not the image directly)
+        prediction = self.pipeline(question=question, context=image_text)
+        return prediction["answer"]  # Adjust based on actual output format of the model
+# Note: This script assumes the use of pytesseract for OCR to process images. Ensure pytesseract is configured properly.