Spaces:
Sleeping
Sleeping
| from preprocessing_images import preprocessing_function | |
| from datetime import datetime | |
| from azure.storage.blob import BlobClient | |
| from msrest.authentication import CognitiveServicesCredentials | |
| #importing azure packages | |
| from azure.cognitiveservices.vision.computervision import ComputerVisionClient | |
| from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes | |
| #ocr extraction using azure computer vision API | |
| def azure_ocr(pdf_url,computervision_client): | |
| try: | |
| read_response = computervision_client.read(pdf_url,raw=True) | |
| read_operation_location = read_response.headers["Operation-Location"] | |
| operation_id = read_operation_location.split("/")[-1] | |
| while True: | |
| read_result = computervision_client.get_read_result(operation_id) | |
| if read_result.status not in ['notStarted', 'running']: | |
| break | |
| words = [] | |
| if read_result.status == OperationStatusCodes.succeeded: | |
| for text_result in read_result.analyze_result.read_results: | |
| for line in text_result.lines: | |
| words.append(line.text) | |
| all_text = ' '.join(words) | |
| return all_text | |
| except Exception as e: | |
| raise Exception(e) | |
| def extract_text_from_url(test_pdf_url): | |
| try: | |
| preprocessing_function(test_pdf_url) | |
| my_blob = 'test_clean_pdf' + datetime.now().strftime('%Y_%m_%d_%H_%M_%S') | |
| blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob) | |
| with open("answer_paper.pdf", "rb") as data: | |
| blob.upload_blob(data) | |
| computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key)) | |
| text = azure_ocr(blob.url, computervision_client) | |
| text = text.lower() | |
| n = text.find("150 word") | |
| if n > 0: | |
| text = text[n+10:] | |
| elif text.find("150 ward") > 0: | |
| nn = text.find("150 ward") | |
| text = text[nn+10:] | |
| return text | |
| except Exception as e: | |
| raise Exception(e) |