File size: 1,189 Bytes
a077090
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import os
import gradio as gr
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from msrest.authentication import CognitiveServicesCredentials
from azure.storage.blob import BlobClient
#import utils functions
from preprocessing_images import preprocessing_function
from extract_text import azure_ocr
my_container = os.getenv("AZURE_CONTAINER")
subscription_key = os.getenv("SUB_KEY")
endpoint = os.getenv("AZURE_ENDPOINT")
connection_string = os.getenv("AZURE_CON_STRING")
computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key))

def ocr_pdf(pdf_url):
    preprocessing_function(pdf_url)
    my_blob = pdf_url.split('/')[-1]
    blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob)
    with open("answer_paper.pdf", "rb") as data:
        blob.upload_blob(data,overwrite=True) 
    text = azure_ocr(blob.url,computervision_client)
    return text.strip()
    
inputs = gr.inputs.Textbox(label="pdf link")
outputs = gr.outputs.Textbox(label="OCR Text")
demo = gr.Interface(fn=ocr_pdf,inputs=inputs,outputs=outputs, title="Azure OCR")
demo.launch()