Spaces:
Build error
Build error
Commit
·
5d57f7a
1
Parent(s):
4b491a9
Modified graphics of the UI
Browse files
app.py
CHANGED
|
@@ -205,17 +205,6 @@ def extractor_clean(text, k_words, transformer, question, total_kwords, return_t
|
|
| 205 |
|
| 206 |
|
| 207 |
|
| 208 |
-
def format_output(extracted_values):
|
| 209 |
-
output = f"Valori: {extracted_values[0][0]}\n"
|
| 210 |
-
output += f"Totale: {extracted_values[0][1]}\n"
|
| 211 |
-
if extracted_values[1] == True:
|
| 212 |
-
output += "-------------------\n"
|
| 213 |
-
output += f"Rif. Testo:\n{extracted_values[2]}"
|
| 214 |
-
return output
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
def pdf_ocr(file):
|
| 220 |
# Convert PDF to image
|
| 221 |
with tempfile.TemporaryDirectory() as path:
|
|
@@ -243,22 +232,25 @@ def pdf_ocr(file):
|
|
| 243 |
# Call extractor_clean and format_output functions
|
| 244 |
ks = ('mq', 'metri quadri', 'm2')
|
| 245 |
tra = 'it5/it5-base-question-answering'
|
| 246 |
-
quest = "Quanti metri quadri misura
|
| 247 |
totalK = ['totale', 'complessivo', 'complessiva']
|
| 248 |
|
| 249 |
extracted_values = extractor_clean(text=text, k_words=ks, transformer=tra, question=quest, total_kwords=totalK, return_text=True)
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
|
|
|
|
| 254 |
|
| 255 |
def ocr_interface(pdf_file):
|
| 256 |
# Call the pdf_ocr function
|
| 257 |
-
|
| 258 |
-
return
|
| 259 |
|
| 260 |
|
| 261 |
pdf_input = gr.inputs.File(label="PDF File")
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
|
| 207 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
def pdf_ocr(file):
|
| 209 |
# Convert PDF to image
|
| 210 |
with tempfile.TemporaryDirectory() as path:
|
|
|
|
| 232 |
# Call extractor_clean and format_output functions
|
| 233 |
ks = ('mq', 'metri quadri', 'm2')
|
| 234 |
tra = 'it5/it5-base-question-answering'
|
| 235 |
+
quest = "Quanti metri quadri misura la superficie?"
|
| 236 |
totalK = ['totale', 'complessivo', 'complessiva']
|
| 237 |
|
| 238 |
extracted_values = extractor_clean(text=text, k_words=ks, transformer=tra, question=quest, total_kwords=totalK, return_text=True)
|
| 239 |
+
values_output = extracted_values[0][0] # Join values with '\n'
|
| 240 |
+
total_output = extracted_values[0][1]
|
| 241 |
+
text_output = extracted_values[2]
|
| 242 |
|
| 243 |
+
return values_output, total_output, text_output
|
| 244 |
|
| 245 |
def ocr_interface(pdf_file):
|
| 246 |
# Call the pdf_ocr function
|
| 247 |
+
values, total, text = pdf_ocr(pdf_file.name)
|
| 248 |
+
return values, total, text
|
| 249 |
|
| 250 |
|
| 251 |
pdf_input = gr.inputs.File(label="PDF File")
|
| 252 |
+
values_output = gr.outputs.Textbox(label="Mq. Values")
|
| 253 |
+
total_output = gr.outputs.Textbox(label="Total")
|
| 254 |
+
text_output = gr.outputs.Textbox(label="Ref. Text")
|
| 255 |
+
iface = gr.Interface(fn=ocr_interface, inputs=pdf_input, title="PDF MQ EXTRACTOR", outputs=[values_output, total_output, text_output], preprocess=format_output)
|
| 256 |
+
iface.launch()
|