File size: 2,247 Bytes
a4f2b8a
 
 
 
 
 
 
a3ea002
a4f2b8a
66fa0b9
 
19f2f15
66fa0b9
a4f2b8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b962f12
a4f2b8a
 
 
 
 
 
 
 
 
 
 
 
 
 
c8d0eb9
a4f2b8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8d0eb9
a4f2b8a
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import gradio
import cv2
from sklearn.naive_bayes import BernoulliNB 
import pickle
import numpy as np

# multiclass_model =  pickle.load(open('models/MulticlassModel_200x200', 'rb'))
ensemble_model =  pickle.load(open('EnsembleModels_200x200', 'rb'))

examples = ['images/test2.jpg','images/test4.jpg','images/test6.jpg',
            "images/Incom.jpg", "images/DLC.jpg", 'images/EHD.jpg',
            'images/IDR.jpg','images/PPD.jpg','images/PSLF.jpg' ,'images/SCD.jpg',
            'images/TLF.jpg']

def preprocess(img):
    img = cv2.resize(img, (200,200))
    img = cv2.adaptiveThreshold(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY),255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
    img = np.reshape(img, (1,200*200))/255
    return img

def predict(img):
    img = preprocess(img)
    categories = {
    "Inco": 2, 
    "Teac": 1, 
    "Cons": 0, 
    "Publ": 4, 
    "Econ": 3,
    "Reaf": 5}

    proba = np.zeros((6))
    for key in categories.keys():
        proba[categories[key]] = ensemble_model[key].predict_proba(img)[:,0]

    return proba

def generate_results(proba):
    categories = [
        "DLC",
        "TLF", 
        "IDR", 
        "EHD",
        "PLSF", 
        "REA",
        "UNKNOWN"] 

    scores = [0,0,0,0,0,0,0]
    
    choice = np.where(proba == np.amin(proba))[0]

    if len(choice)>1:
            choice = 6
    scores[int(choice)] = 1

    results = dict(zip(categories, scores))
    return results

def inference(img):
    proba = predict(img)
    results = generate_results(proba)
    return results

demo = gradio.Interface(
    fn=inference,
    inputs=gradio.Image(),
    outputs=gradio.Label(),
    title='Document Classification', 
    description='Loan Document Classification Using A Naive Bayes Classifier Ensemble',
    article='The purpose of this demo was to provide a simple baseline for the classification of document images. View the complete write up here https://github.com/PatrickTyBrown/document_classification/blob/main/project_writeup.pdf\n\n\nLinkedin: https://www.linkedin.com/in/patrick-ty-brown/\nGithub: https://github.com/PatrickTyBrown/document_classification\nPortfolio: https://sites.google.com/view/patrick-brown/home',    
    examples=examples)  

demo.launch()