ttemu commited on
Commit
26e50d6
·
verified ·
1 Parent(s): 60b744e

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ receipt_image1.png filter=lfs diff=lfs merge=lfs -text
37
+ receipt_image2.png filter=lfs diff=lfs merge=lfs -text
38
+ receipt_image3.png filter=lfs diff=lfs merge=lfs -text
39
+ receipt_image4.png filter=lfs diff=lfs merge=lfs -text
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Paligemma Invoice Json
3
- emoji: 📊
4
- colorFrom: gray
5
- colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 5.20.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: paligemma_invoice_json
3
+ app_file: app.py
 
 
4
  sdk: gradio
5
  sdk_version: 5.20.0
 
 
6
  ---
 
 
app.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor
3
+ import spaces
4
+ import torch
5
+
6
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
7
+ model_id="mychen76/paligemma-receipt-json-3b-mix-448-v2b"
8
+ dtype = torch.bfloat16
9
+ model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, torch_dtype=dtype).to(device).eval()
10
+ processor = PaliGemmaProcessor.from_pretrained(model_id)
11
+
12
+ MAX_TOKENS = 512
13
+
14
+ import re
15
+ # let's turn that into JSON source from Donut
16
+ def token2json(tokens, is_inner_value=False, added_vocab=None):
17
+ """
18
+ Convert a (generated) token sequence into an ordered JSON format.
19
+ """
20
+ if added_vocab is None:
21
+ added_vocab = processor.tokenizer.get_added_vocab()
22
+ output = {}
23
+ while tokens:
24
+ start_token = re.search(r"<s_(.*?)>", tokens, re.IGNORECASE)
25
+ if start_token is None:
26
+ break
27
+ key = start_token.group(1)
28
+ key_escaped = re.escape(key)
29
+
30
+ end_token = re.search(rf"</s_{key_escaped}>", tokens, re.IGNORECASE)
31
+ start_token = start_token.group()
32
+ if end_token is None:
33
+ tokens = tokens.replace(start_token, "")
34
+ else:
35
+ end_token = end_token.group()
36
+ start_token_escaped = re.escape(start_token)
37
+ end_token_escaped = re.escape(end_token)
38
+ content = re.search(
39
+ f"{start_token_escaped}(.*?){end_token_escaped}", tokens, re.IGNORECASE | re.DOTALL
40
+ )
41
+ if content is not None:
42
+ content = content.group(1).strip()
43
+ if r"<s_" in content and r"</s_" in content: # non-leaf node
44
+ value = token2json(content, is_inner_value=True, added_vocab=added_vocab)
45
+ if value:
46
+ if len(value) == 1:
47
+ value = value[0]
48
+ output[key] = value
49
+ else: # leaf nodes
50
+ output[key] = []
51
+ for leaf in content.split(r"<sep/>"):
52
+ leaf = leaf.strip()
53
+ if leaf in added_vocab and leaf[0] == "<" and leaf[-2:] == "/>":
54
+ leaf = leaf[1:-2] # for categorical special tokens
55
+ output[key].append(leaf)
56
+ if len(output[key]) == 1:
57
+ output[key] = output[key][0]
58
+
59
+ tokens = tokens[tokens.find(end_token) + len(end_token) :].strip()
60
+ if tokens[:6] == r"<sep/>": # non-leaf nodes
61
+ return [output] + token2json(tokens[6:], is_inner_value=True, added_vocab=added_vocab)
62
+
63
+ if len(output):
64
+ return [output] if is_inner_value else output
65
+ else:
66
+ return [] if is_inner_value else {"text_sequence": tokens}
67
+
68
+
69
+ def modify_caption(caption: str) -> str:
70
+ """
71
+ Removes specific prefixes from captions.
72
+
73
+ Args:
74
+ caption (str): A string containing a caption.
75
+
76
+ Returns:
77
+ str: The caption with the prefix removed if it was present.
78
+ """
79
+ # Define the prefixes to remove
80
+ prefix_substrings = [
81
+ ('EXTRACT_JSON_RECEIPT', '')
82
+ ]
83
+
84
+ # Create a regex pattern to match any of the prefixes
85
+ pattern = '|'.join([re.escape(opening) for opening, _ in prefix_substrings])
86
+ replacers = {opening: replacer for opening, replacer in prefix_substrings}
87
+
88
+ # Function to replace matched prefix with its corresponding replacement
89
+ def replace_fn(match):
90
+ return replacers[match.group(0)]
91
+
92
+ # Apply the regex to the caption
93
+ return re.sub(pattern, replace_fn, caption, count=1, flags=re.IGNORECASE)
94
+
95
+ def json_inference(image, input_text="EXTRACT_JSON_RECEIPT", device="cuda:0", max_new_tokens=512):
96
+ inputs = processor(text=input_text, images=image, return_tensors="pt").to(device)
97
+ # Autoregressively generate use greedy decoding here,for more fancy methods see https://huggingface.co/blog/how-to-generate
98
+ generated_ids = model.generate(**inputs, max_new_tokens=max_new_tokens)
99
+ # Next turn each predicted token ID back into a string using the decode method
100
+ # We chop of the prompt, which consists of image tokens and our text prompt
101
+ image_token_index = model.config.image_token_index
102
+ num_image_tokens = len(generated_ids[generated_ids==image_token_index])
103
+ num_text_tokens = len(processor.tokenizer.encode(input_text))
104
+ num_prompt_tokens = num_image_tokens + num_text_tokens + 2
105
+ generated_text = processor.batch_decode(generated_ids[:, num_prompt_tokens:], skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
106
+
107
+ # convert it into JSON using the method below (taken from Donut):
108
+ generated_json = token2json(generated_text)
109
+ return generated_text, generated_json
110
+
111
+ # enable space
112
+ # @spaces.GPU
113
+ def create_captions_rich(image):
114
+ torch.cuda.empty_cache()
115
+ prompt = "EXTRACT_JSON_RECEIPT"
116
+ generated_text, generated_json = json_inference(image=image,input_text="EXTRACT_JSON_RECEIPT", device=device, max_new_tokens=MAX_TOKENS)
117
+ return generated_json
118
+
119
+ css = """
120
+ #mkd {
121
+ height: 500px;
122
+ overflow: auto;
123
+ border: 1px solid #ccc;
124
+ }
125
+ """
126
+
127
+ with gr.Blocks(css=css) as demo:
128
+ gr.HTML("<h1><center>PaliGemma Receipt and Invoice Model<center><h1>")
129
+ with gr.Tab(label="Receipt or Invoices Image"):
130
+ with gr.Row():
131
+ with gr.Column():
132
+ input_img = gr.Image(label="Input Picture")
133
+ submit_btn = gr.Button(value="Submit")
134
+ output = gr.Text(label="Receipt Json")
135
+
136
+ gr.Examples([["receipt_image1.jpg"], ["receipt_image2.jpg"], ["receipt_image3.png"],["receipt_image4.png"]],
137
+ inputs = [input_img],
138
+ outputs = [output],
139
+ fn=create_captions_rich,
140
+ label='Try captioning on examples'
141
+ )
142
+
143
+ submit_btn.click(create_captions_rich, [input_img], [output])
144
+
145
+ demo.queue().launch(share=True,server_name="0.0.0.0",debug=True)
receipt_image1.png ADDED

Git LFS Details

  • SHA256: 79972167aa943cb851a4df83a207f511ac14864b6dc62f0676d4150d5cfbb171
  • Pointer size: 131 Bytes
  • Size of remote file: 826 kB
receipt_image2.png ADDED

Git LFS Details

  • SHA256: fc0adf2932dfecfdfa504f64c79e42e6526b25ceb86c05ddc9fdb220114ee5d9
  • Pointer size: 131 Bytes
  • Size of remote file: 435 kB
receipt_image3.png ADDED

Git LFS Details

  • SHA256: c7c67130fef0a29891702b72c44e63b684498c132f8588162711609ab404336f
  • Pointer size: 131 Bytes
  • Size of remote file: 484 kB
receipt_image4.png ADDED

Git LFS Details

  • SHA256: 4f90f0012474d5f8e9cf1a8df332216112669daacb4a6cfb17d633972a2b67cd
  • Pointer size: 131 Bytes
  • Size of remote file: 606 kB