azure_ocr2.0 / app.py
themeetjani's picture
Duplicate from ai-based/azure_ocr
a077090
import os
import gradio as gr
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from msrest.authentication import CognitiveServicesCredentials
from azure.storage.blob import BlobClient
#import utils functions
from preprocessing_images import preprocessing_function
from extract_text import azure_ocr
my_container = os.getenv("AZURE_CONTAINER")
subscription_key = os.getenv("SUB_KEY")
endpoint = os.getenv("AZURE_ENDPOINT")
connection_string = os.getenv("AZURE_CON_STRING")
computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key))
def ocr_pdf(pdf_url):
preprocessing_function(pdf_url)
my_blob = pdf_url.split('/')[-1]
blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob)
with open("answer_paper.pdf", "rb") as data:
blob.upload_blob(data,overwrite=True)
text = azure_ocr(blob.url,computervision_client)
return text.strip()
inputs = gr.inputs.Textbox(label="pdf link")
outputs = gr.outputs.Textbox(label="OCR Text")
demo = gr.Interface(fn=ocr_pdf,inputs=inputs,outputs=outputs, title="Azure OCR")
demo.launch()