Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,13 +7,6 @@ from azure.storage.blob import BlobClient
|
|
| 7 |
#import utils functions
|
| 8 |
from preprocessing_images import preprocessing_function
|
| 9 |
from extract_text import azure_ocr
|
| 10 |
-
|
| 11 |
-
from sentence_transformers import SentenceTransformer
|
| 12 |
-
!pip install -U sentence-transformers
|
| 13 |
-
from numpy.linalg import norm
|
| 14 |
-
import numpy as np
|
| 15 |
-
|
| 16 |
-
|
| 17 |
my_container = os.getenv("AZURE_CONTAINER")
|
| 18 |
subscription_key = os.getenv("SUB_KEY")
|
| 19 |
endpoint = os.getenv("AZURE_ENDPOINT")
|
|
@@ -21,23 +14,23 @@ connection_string = os.getenv("AZURE_CON_STRING")
|
|
| 21 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
| 22 |
computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key))
|
| 23 |
|
| 24 |
-
def ocr_pdf(
|
| 25 |
-
preprocessing_function(
|
| 26 |
-
my_blob =
|
| 27 |
blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob)
|
| 28 |
with open("answer_paper.pdf", "rb") as data:
|
| 29 |
blob.upload_blob(data,overwrite=True)
|
| 30 |
text = azure_ocr(blob.url,computervision_client)
|
| 31 |
return text.strip()
|
| 32 |
|
| 33 |
-
def ocr_pdf(pdf_url2):
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
|
| 42 |
def classify_cause(incident_description):
|
| 43 |
response = openai.Completion.create(
|
|
@@ -65,42 +58,26 @@ def classify_class(incident_description):
|
|
| 65 |
classification = response.choices[0].text.strip()
|
| 66 |
return classification
|
| 67 |
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
embeddings = model.encode(sentences)
|
| 72 |
-
cosine = np.dot(embeddings[0],embeddings[1])/(norm(embeddings[0])*norm(embeddings[1]))
|
| 73 |
-
return cosine
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
def avatiation(pdf_url1,pdf_url2):
|
| 77 |
-
pdftext1 = ocr_pdf(pdf_url1)
|
| 78 |
-
pdftext2 = ocr_pdf(pdf_url2)
|
| 79 |
-
|
| 80 |
-
defect_class1 = classify_class(pdftext1)
|
| 81 |
-
main_issue1 = classify_cause(pdftext1)
|
| 82 |
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
return
|
| 87 |
|
| 88 |
|
| 89 |
|
| 90 |
inputs1 = gr.inputs.Textbox(label="Link for aviation log reports")
|
| 91 |
-
inputs2 = gr.inputs.Textbox(label="Link for aviation log reports 2")
|
| 92 |
|
| 93 |
|
| 94 |
outputs = [gr.outputs.Textbox(label="Main Issue of the log report"),
|
| 95 |
-
gr.outputs.Textbox(label="category of the log report")
|
| 96 |
-
|
| 97 |
-
gr.outputs.Textbox(label="Main Issue of the log report2"),
|
| 98 |
-
gr.outputs.Textbox(label="category of the log report2"),
|
| 99 |
-
gr.outputs.Textbox(label="log similarity")
|
| 100 |
-
|
| 101 |
]
|
| 102 |
|
| 103 |
|
| 104 |
-
demo = gr.Interface(fn=avatiation,inputs=
|
| 105 |
demo.launch()
|
| 106 |
|
|
|
|
| 7 |
#import utils functions
|
| 8 |
from preprocessing_images import preprocessing_function
|
| 9 |
from extract_text import azure_ocr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
my_container = os.getenv("AZURE_CONTAINER")
|
| 11 |
subscription_key = os.getenv("SUB_KEY")
|
| 12 |
endpoint = os.getenv("AZURE_ENDPOINT")
|
|
|
|
| 14 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
| 15 |
computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key))
|
| 16 |
|
| 17 |
+
def ocr_pdf(pdf_url):
|
| 18 |
+
preprocessing_function(pdf_url)
|
| 19 |
+
my_blob = pdf_url.split('/')[-1]
|
| 20 |
blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob)
|
| 21 |
with open("answer_paper.pdf", "rb") as data:
|
| 22 |
blob.upload_blob(data,overwrite=True)
|
| 23 |
text = azure_ocr(blob.url,computervision_client)
|
| 24 |
return text.strip()
|
| 25 |
|
| 26 |
+
# def ocr_pdf(pdf_url2):
|
| 27 |
+
# preprocessing_function(pdf_url2)
|
| 28 |
+
# my_blob = pdf_url2.split('/')[-1]
|
| 29 |
+
# blob = BlobClient.from_connection_string(conn_str=connection_string, container_name= my_container, blob_name=my_blob)
|
| 30 |
+
# with open("answer_paper.pdf", "rb") as data:
|
| 31 |
+
# blob.upload_blob(data,overwrite=True)
|
| 32 |
+
# text = azure_ocr(blob.url,computervision_client)
|
| 33 |
+
# return text.strip()
|
| 34 |
|
| 35 |
def classify_cause(incident_description):
|
| 36 |
response = openai.Completion.create(
|
|
|
|
| 58 |
classification = response.choices[0].text.strip()
|
| 59 |
return classification
|
| 60 |
|
| 61 |
+
|
| 62 |
+
def avatiation(pdf_url):
|
| 63 |
+
pdftext = ocr_pdf(pdf_url)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
+
|
| 66 |
+
defect_class = classify_class(pdftext)
|
| 67 |
+
main_issue = classify_cause(pdftext)
|
| 68 |
+
return main_issue, defect_class
|
| 69 |
|
| 70 |
|
| 71 |
|
| 72 |
inputs1 = gr.inputs.Textbox(label="Link for aviation log reports")
|
| 73 |
+
#inputs2 = gr.inputs.Textbox(label="Link for aviation log reports 2")
|
| 74 |
|
| 75 |
|
| 76 |
outputs = [gr.outputs.Textbox(label="Main Issue of the log report"),
|
| 77 |
+
gr.outputs.Textbox(label="category of the log report")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
]
|
| 79 |
|
| 80 |
|
| 81 |
+
demo = gr.Interface(fn=avatiation,inputs=inputs1,outputs=outputs, title="ATA Auto classification using OCR and GPT3 ")
|
| 82 |
demo.launch()
|
| 83 |
|