import os import gradio as gr from azure.cognitiveservices.vision.computervision import ComputerVisionClient from msrest.authentication import CognitiveServicesCredentials from azure.storage.blob import BlobClient #import utils functions from preprocessing_images import preprocessing_function from extract_text import azure_ocr my_container = os.getenv("AZURE_CONTAINER") subscription_key = os.getenv("SUB_KEY") endpoint = os.getenv("AZURE_ENDPOINT") connection_string = os.getenv("AZURE_CON_STRING") computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key)) def ocr_pdf(pdf_url): preprocessing_function(pdf_url) my_blob = pdf_url.split('/')[-1] blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob) with open("answer_paper.pdf", "rb") as data: blob.upload_blob(data,overwrite=True) text = azure_ocr(blob.url,computervision_client) return text.strip() inputs = gr.inputs.Textbox(label="pdf link") outputs = gr.outputs.Textbox(label="OCR Text") demo = gr.Interface(fn=ocr_pdf,inputs=inputs,outputs=outputs, title="Azure OCR") demo.launch()