Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from azure.cognitiveservices.vision.computervision import ComputerVisionClient | |
| from msrest.authentication import CognitiveServicesCredentials | |
| from azure.storage.blob import BlobClient | |
| #import utils functions | |
| from preprocessing_images import preprocessing_function | |
| from extract_text import azure_ocr | |
| my_container = os.getenv("AZURE_CONTAINER") | |
| subscription_key = os.getenv("SUB_KEY") | |
| endpoint = os.getenv("AZURE_ENDPOINT") | |
| connection_string = os.getenv("AZURE_CON_STRING") | |
| computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key)) | |
| def ocr_pdf(pdf_url): | |
| preprocessing_function(pdf_url) | |
| my_blob = pdf_url.split('/')[-1] | |
| blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob) | |
| with open("answer_paper.pdf", "rb") as data: | |
| blob.upload_blob(data,overwrite=True) | |
| text = azure_ocr(blob.url,computervision_client) | |
| return text.strip() | |
| inputs = gr.inputs.Textbox(label="pdf link") | |
| outputs = gr.outputs.Textbox(label="OCR Text") | |
| demo = gr.Interface(fn=ocr_pdf,inputs=inputs,outputs=outputs, title="Azure OCR") | |
| demo.launch() | |