import streamlit as st
from PIL import Image
import pytesseract
def tesseract_ocr(image, language):
with st.spinner("Performing OCR..."):
result = pytesseract.image_to_string(image, lang=language)
return result
st.title("AIconvert OCR")
st.markdown('', unsafe_allow_html=True)
hide_streamlit_style = """
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
image = st.file_uploader("Upload Image", type=["jpg", "png", "jpeg"])
languages = {
"English": "eng",
"Arabic": "ara",
"Afrikaans": "afr",
"Albanian": "sqi",
"Amharic": "amh",
"Armenian": "hye",
"Azerbaijani": "aze",
"Basque": "eus",
"Belarusian": "bel",
"Bengali": "ben",
"Bosnian": "bos",
"Bulgarian": "bul",
"Catalan": "cat",
"Cebuano": "ceb",
"Chichewa": "nya",
"Chinese (Simplified)": "chi_sim",
"Chinese (Traditional)": "chi_tra",
"Corsican": "cos",
"Croatian": "hrv",
"Czech": "ces",
"Danish": "dan",
"Dutch": "nld",
"Esperanto": "epo",
"Estonian": "est",
"Filipino": "fil",
"Finnish": "fin",
"French": "fra",
"Frisian": "fry",
"Galician": "glg",
"Georgian": "kat",
"German": "deu",
"Greek": "ell",
"Gujarati": "guj",
"Haitian Creole": "hat",
"Hausa": "hau",
"Hawaiian": "haw",
"Hebrew": "heb",
"Hindi": "hin",
"Hmong": "hmn",
"Hungarian": "hun",
"Icelandic": "isl",
"Igbo": "ibo",
"Indonesian": "ind",
"Irish": "gle",
"Italian": "ita",
"Japanese": "jpn",
"Javanese": "jav",
"Kannada": "kan",
"Kazakh": "kaz",
"Khmer": "khm",
"Kinyarwanda": "kin",
"Korean": "kor",
"Kurdish (Kurmanji)": "kur",
"Kyrgyz": "kir",
"Lao": "lao",
"Latin": "lat",
"Latvian": "lav",
"Lithuanian": "lit",
"Luxembourgish": "ltz",
"Macedonian": "mkd",
"Malagasy": "mlg",
"Malay": "msa",
"Malayalam": "mal",
"Maltese": "mlt",
"Maori": "mri",
"Marathi": "mar",
"Mongolian": "mon",
"Myanmar (Burmese)": "mya",
"Nepali": "nep",
"Norwegian": "nor",
"Odia (Oriya)": "ori",
"Pashto": "pus",
"Persian": "fas",
"Polish": "pol",
"Portuguese": "por",
"Punjabi": "pan",
"Romanian": "ron",
"Russian": "rus",
"Samoan": "smo",
"Scots Gaelic": "gla",
"Serbian": "srp",
"Sesotho": "sot",
"Shona": "sna",
"Sindhi": "snd",
"Sinhala": "sin",
"Slovak": "slk",
"Slovenian": "slv",
"Somali": "som",
"Spanish": "spa",
"Sundanese": "sun",
"Swahili": "swa",
"Swedish": "swe",
"Tajik": "tgk",
"Tamil": "tam",
"Tatar": "tat",
"Telugu": "tel",
"Thai": "tha",
"Turkish": "tur",
"Turkmen": "tuk",
"Ukrainian": "ukr",
"Urdu": "urd",
"Uyghur": "uig",
"Uzbek": "uzb",
"Vietnamese": "vie",
"Welsh": "cym",
"Xhosa": "xho",
"Yiddish": "yid",
"Yoruba": "yor",
"Zulu": "zul",
}
selected_language = st.selectbox("Select Language", list(languages.keys()), index=0)
if st.button("Submit"):
if image is not None:
image = Image.open(image)
result = tesseract_ocr(image, languages[selected_language])
st.subheader("Output:")
st.text(result)
else:
st.warning("Please upload an image.")