Omnibus commited on
Commit
1323bb3
·
0 Parent(s):

Duplicate from Omnibus/detect-language

Browse files
Files changed (4) hide show
  1. .gitattributes +35 -0
  2. README.md +13 -0
  3. app.py +117 -0
  4. requirements.txt +1 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Detect Language
3
+ emoji: 🌍
4
+ colorFrom: red
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 3.42.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: Omnibus/detect-language
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import easyocr
3
+ import numpy as np
4
+ from PIL import Image
5
+
6
+
7
+
8
+ ocr_id = {
9
+ "Afrikaans": "af",
10
+ "Albanian": "sq",
11
+ "Arabic": "ar",
12
+ "Azerbaijani": "az",
13
+ "Belarusian": "be",
14
+ "Bulgarian": "bg",
15
+ "Bengali": "bn",
16
+ "Bosnian": "bs",
17
+ "Chinese (simplified)": "ch_sim",
18
+ "Chinese (traditional)": "ch_tra",
19
+ "Croatian": "hr",
20
+ "Czech": "cs",
21
+ "Danish": "da",
22
+ "Dutch": "nl",
23
+ "English": "en",
24
+ "Estonian": "et",
25
+ "French": "fr",
26
+ "German": "de",
27
+ "Irish": "ga",
28
+ "Hindi": "hi",
29
+ "Hungarian": "hu",
30
+ "Indonesian": "id",
31
+ "Icelandic": "is",
32
+ "Italian": "it",
33
+ "Japanese": "ja",
34
+ "Kannada": "kn",
35
+ "Korean": "ko",
36
+ "Lithuanian": "lt",
37
+ "Latvian": "lv",
38
+ "Mongolian": "mn",
39
+ "Marathi": "mr",
40
+ "Malay": "ms",
41
+ "Nepali": "ne",
42
+ "Norwegian": "no",
43
+ "Occitan": "oc",
44
+ "Polish": "pl",
45
+ "Portuguese": "pt",
46
+ "Romanian": "ro",
47
+ "Russian": "ru",
48
+ "Serbian (cyrillic)": "rs_cyrillic",
49
+ "Serbian (latin)": "rs_latin",
50
+ "Slovak": "sk",
51
+ "Slovenian": "sl",
52
+ "Spanish": "es",
53
+ "Swedish": "sv",
54
+ "Swahili": "sw",
55
+ "Tamil": "ta",
56
+ "Thai": "th",
57
+ "Tagalog": "tl",
58
+ "Turkish": "tr",
59
+ "Ukrainian": "uk",
60
+ "Urdu": "ur",
61
+ "Uzbek": "uz",
62
+ "Vietnamese": "vi",
63
+ "Welsh": "cy",
64
+ "Zulu": "zu",
65
+ }
66
+
67
+ def detect_lang(img,conf,blength=200):
68
+ targ = 0
69
+ ser_len=len(ocr_id)
70
+ #targ =[]
71
+ img = Image.open(img)
72
+ img1 = np.array(img)
73
+ #keyd = ocr_id.keys()
74
+ numb=0
75
+ mylist = []
76
+ for key in ocr_id.keys():
77
+ mylist.append(key)
78
+ for numb, key in enumerate(mylist):
79
+ #for i,keyp in enumerate(keyd):
80
+ #keyp = keyd[key]
81
+ try:
82
+ lang=[f"{ocr_id[mylist[numb]]}"]
83
+ #lang=value
84
+ #img.thumbnail((1000,1000), Image.Resampling.LANCZOS)
85
+ #path = f"/tmp/{uuid.uuid4()}.jpg"
86
+ #img.save(path)
87
+ reader = easyocr.Reader(lang)
88
+ bounds = reader.readtext(img1)
89
+ print (f'{bounds[0][1]} = {bounds[0][2]}')
90
+
91
+ if len(bounds) > blength:
92
+ return print(f"Max Bounds Exceed, bounds={len(bounds)}")
93
+ else:
94
+ pass
95
+ if bounds[0][2] > targ:
96
+ targ = bounds[0][2]
97
+
98
+ out = (f'{keyp} = Confidence: {bounds[0][2]}')
99
+ #numb+=1
100
+ if numb>=ser_len:
101
+ return out
102
+ if targ >=75:
103
+ return out
104
+ except Exception:
105
+
106
+ pass
107
+
108
+ with gr.Blocks() as app:
109
+
110
+ im = gr.Image(type = "filepath")
111
+ ocr_sens=gr.Slider(0.1, 1, step=0.05,value=0.25,label="Detect Min Confidence")
112
+ max_tok=gr.Number(label="Max Tokens",step=1, value=200)
113
+ det_btn = gr.Button()
114
+ det_out = gr.Markdown("""""")
115
+
116
+ det_btn.click(detect_lang,[im,ocr_sens,],det_out)
117
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ easyocr