Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,141 @@
|
|
| 1 |
-
import
|
|
|
|
|
|
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from PIL import Image
|
| 3 |
+
import pytesseract
|
| 4 |
|
| 5 |
+
def tesseract_ocr(image, language):
|
| 6 |
+
with st.spinner("Performing OCR..."):
|
| 7 |
+
result = pytesseract.image_to_string(image, lang=language)
|
| 8 |
+
return result
|
| 9 |
|
| 10 |
+
st.title("AIconvert OCR")
|
| 11 |
+
st.markdown('<style>h1{color: crimson; text-align: center;}</style>', unsafe_allow_html=True)
|
| 12 |
+
hide_streamlit_style = """
|
| 13 |
+
<style>
|
| 14 |
+
#MainMenu {visibility: hidden;}
|
| 15 |
+
footer {visibility: hidden;}
|
| 16 |
+
</style>
|
| 17 |
+
"""
|
| 18 |
+
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
|
| 19 |
+
image = st.file_uploader("Upload Image", type=["jpg", "png", "jpeg"])
|
| 20 |
+
languages = {
|
| 21 |
+
"English": "eng",
|
| 22 |
+
"Arabic": "ara",
|
| 23 |
+
"Afrikaans": "afr",
|
| 24 |
+
"Albanian": "sqi",
|
| 25 |
+
"Amharic": "amh",
|
| 26 |
+
"Armenian": "hye",
|
| 27 |
+
"Azerbaijani": "aze",
|
| 28 |
+
"Basque": "eus",
|
| 29 |
+
"Belarusian": "bel",
|
| 30 |
+
"Bengali": "ben",
|
| 31 |
+
"Bosnian": "bos",
|
| 32 |
+
"Bulgarian": "bul",
|
| 33 |
+
"Catalan": "cat",
|
| 34 |
+
"Cebuano": "ceb",
|
| 35 |
+
"Chichewa": "nya",
|
| 36 |
+
"Chinese (Simplified)": "chi_sim",
|
| 37 |
+
"Chinese (Traditional)": "chi_tra",
|
| 38 |
+
"Corsican": "cos",
|
| 39 |
+
"Croatian": "hrv",
|
| 40 |
+
"Czech": "ces",
|
| 41 |
+
"Danish": "dan",
|
| 42 |
+
"Dutch": "nld",
|
| 43 |
+
"Esperanto": "epo",
|
| 44 |
+
"Estonian": "est",
|
| 45 |
+
"Filipino": "fil",
|
| 46 |
+
"Finnish": "fin",
|
| 47 |
+
"French": "fra",
|
| 48 |
+
"Frisian": "fry",
|
| 49 |
+
"Galician": "glg",
|
| 50 |
+
"Georgian": "kat",
|
| 51 |
+
"German": "deu",
|
| 52 |
+
"Greek": "ell",
|
| 53 |
+
"Gujarati": "guj",
|
| 54 |
+
"Haitian Creole": "hat",
|
| 55 |
+
"Hausa": "hau",
|
| 56 |
+
"Hawaiian": "haw",
|
| 57 |
+
"Hebrew": "heb",
|
| 58 |
+
"Hindi": "hin",
|
| 59 |
+
"Hmong": "hmn",
|
| 60 |
+
"Hungarian": "hun",
|
| 61 |
+
"Icelandic": "isl",
|
| 62 |
+
"Igbo": "ibo",
|
| 63 |
+
"Indonesian": "ind",
|
| 64 |
+
"Irish": "gle",
|
| 65 |
+
"Italian": "ita",
|
| 66 |
+
"Japanese": "jpn",
|
| 67 |
+
"Javanese": "jav",
|
| 68 |
+
"Kannada": "kan",
|
| 69 |
+
"Kazakh": "kaz",
|
| 70 |
+
"Khmer": "khm",
|
| 71 |
+
"Kinyarwanda": "kin",
|
| 72 |
+
"Korean": "kor",
|
| 73 |
+
"Kurdish (Kurmanji)": "kur",
|
| 74 |
+
"Kyrgyz": "kir",
|
| 75 |
+
"Lao": "lao",
|
| 76 |
+
"Latin": "lat",
|
| 77 |
+
"Latvian": "lav",
|
| 78 |
+
"Lithuanian": "lit",
|
| 79 |
+
"Luxembourgish": "ltz",
|
| 80 |
+
"Macedonian": "mkd",
|
| 81 |
+
"Malagasy": "mlg",
|
| 82 |
+
"Malay": "msa",
|
| 83 |
+
"Malayalam": "mal",
|
| 84 |
+
"Maltese": "mlt",
|
| 85 |
+
"Maori": "mri",
|
| 86 |
+
"Marathi": "mar",
|
| 87 |
+
"Mongolian": "mon",
|
| 88 |
+
"Myanmar (Burmese)": "mya",
|
| 89 |
+
"Nepali": "nep",
|
| 90 |
+
"Norwegian": "nor",
|
| 91 |
+
"Odia (Oriya)": "ori",
|
| 92 |
+
"Pashto": "pus",
|
| 93 |
+
"Persian": "fas",
|
| 94 |
+
"Polish": "pol",
|
| 95 |
+
"Portuguese": "por",
|
| 96 |
+
"Punjabi": "pan",
|
| 97 |
+
"Romanian": "ron",
|
| 98 |
+
"Russian": "rus",
|
| 99 |
+
"Samoan": "smo",
|
| 100 |
+
"Scots Gaelic": "gla",
|
| 101 |
+
"Serbian": "srp",
|
| 102 |
+
"Sesotho": "sot",
|
| 103 |
+
"Shona": "sna",
|
| 104 |
+
"Sindhi": "snd",
|
| 105 |
+
"Sinhala": "sin",
|
| 106 |
+
"Slovak": "slk",
|
| 107 |
+
"Slovenian": "slv",
|
| 108 |
+
"Somali": "som",
|
| 109 |
+
"Spanish": "spa",
|
| 110 |
+
"Sundanese": "sun",
|
| 111 |
+
"Swahili": "swa",
|
| 112 |
+
"Swedish": "swe",
|
| 113 |
+
"Tajik": "tgk",
|
| 114 |
+
"Tamil": "tam",
|
| 115 |
+
"Tatar": "tat",
|
| 116 |
+
"Telugu": "tel",
|
| 117 |
+
"Thai": "tha",
|
| 118 |
+
"Turkish": "tur",
|
| 119 |
+
"Turkmen": "tuk",
|
| 120 |
+
"Ukrainian": "ukr",
|
| 121 |
+
"Urdu": "urd",
|
| 122 |
+
"Uyghur": "uig",
|
| 123 |
+
"Uzbek": "uzb",
|
| 124 |
+
"Vietnamese": "vie",
|
| 125 |
+
"Welsh": "cym",
|
| 126 |
+
"Xhosa": "xho",
|
| 127 |
+
"Yiddish": "yid",
|
| 128 |
+
"Yoruba": "yor",
|
| 129 |
+
"Zulu": "zul",
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
selected_language = st.selectbox("Select Language", list(languages.keys()), index=0)
|
| 133 |
+
|
| 134 |
+
if st.button("Submit"):
|
| 135 |
+
if image is not None:
|
| 136 |
+
image = Image.open(image)
|
| 137 |
+
result = tesseract_ocr(image, languages[selected_language])
|
| 138 |
+
st.subheader("Output:")
|
| 139 |
+
st.text(result)
|
| 140 |
+
else:
|
| 141 |
+
st.warning("Please upload an image.")
|