| import streamlit as st |
| from PIL import Image |
| import pytesseract |
|
|
| def tesseract_ocr(image, language): |
| with st.spinner("Performing OCR..."): |
| result = pytesseract.image_to_string(image, lang=language) |
| return result |
|
|
| st.title("AIconvert OCR") |
| st.markdown('<style>h1{color: crimson; text-align: center;}</style>', unsafe_allow_html=True) |
| hide_streamlit_style = """ |
| <style> |
| #MainMenu {visibility: hidden;} |
| footer {visibility: hidden;} |
| </style> |
| """ |
| st.markdown(hide_streamlit_style, unsafe_allow_html=True) |
| image = st.file_uploader("Upload Image", type=["jpg", "png", "jpeg"]) |
| languages = { |
| "English": "eng", |
| "Arabic": "ara", |
| "Afrikaans": "afr", |
| "Albanian": "sqi", |
| "Amharic": "amh", |
| "Armenian": "hye", |
| "Azerbaijani": "aze", |
| "Basque": "eus", |
| "Belarusian": "bel", |
| "Bengali": "ben", |
| "Bosnian": "bos", |
| "Bulgarian": "bul", |
| "Catalan": "cat", |
| "Cebuano": "ceb", |
| "Chichewa": "nya", |
| "Chinese (Simplified)": "chi_sim", |
| "Chinese (Traditional)": "chi_tra", |
| "Corsican": "cos", |
| "Croatian": "hrv", |
| "Czech": "ces", |
| "Danish": "dan", |
| "Dutch": "nld", |
| "Esperanto": "epo", |
| "Estonian": "est", |
| "Filipino": "fil", |
| "Finnish": "fin", |
| "French": "fra", |
| "Frisian": "fry", |
| "Galician": "glg", |
| "Georgian": "kat", |
| "German": "deu", |
| "Greek": "ell", |
| "Gujarati": "guj", |
| "Haitian Creole": "hat", |
| "Hausa": "hau", |
| "Hawaiian": "haw", |
| "Hebrew": "heb", |
| "Hindi": "hin", |
| "Hmong": "hmn", |
| "Hungarian": "hun", |
| "Icelandic": "isl", |
| "Igbo": "ibo", |
| "Indonesian": "ind", |
| "Irish": "gle", |
| "Italian": "ita", |
| "Japanese": "jpn", |
| "Javanese": "jav", |
| "Kannada": "kan", |
| "Kazakh": "kaz", |
| "Khmer": "khm", |
| "Kinyarwanda": "kin", |
| "Korean": "kor", |
| "Kurdish (Kurmanji)": "kur", |
| "Kyrgyz": "kir", |
| "Lao": "lao", |
| "Latin": "lat", |
| "Latvian": "lav", |
| "Lithuanian": "lit", |
| "Luxembourgish": "ltz", |
| "Macedonian": "mkd", |
| "Malagasy": "mlg", |
| "Malay": "msa", |
| "Malayalam": "mal", |
| "Maltese": "mlt", |
| "Maori": "mri", |
| "Marathi": "mar", |
| "Mongolian": "mon", |
| "Myanmar (Burmese)": "mya", |
| "Nepali": "nep", |
| "Norwegian": "nor", |
| "Odia (Oriya)": "ori", |
| "Pashto": "pus", |
| "Persian": "fas", |
| "Polish": "pol", |
| "Portuguese": "por", |
| "Punjabi": "pan", |
| "Romanian": "ron", |
| "Russian": "rus", |
| "Samoan": "smo", |
| "Scots Gaelic": "gla", |
| "Serbian": "srp", |
| "Sesotho": "sot", |
| "Shona": "sna", |
| "Sindhi": "snd", |
| "Sinhala": "sin", |
| "Slovak": "slk", |
| "Slovenian": "slv", |
| "Somali": "som", |
| "Spanish": "spa", |
| "Sundanese": "sun", |
| "Swahili": "swa", |
| "Swedish": "swe", |
| "Tajik": "tgk", |
| "Tamil": "tam", |
| "Tatar": "tat", |
| "Telugu": "tel", |
| "Thai": "tha", |
| "Turkish": "tur", |
| "Turkmen": "tuk", |
| "Ukrainian": "ukr", |
| "Urdu": "urd", |
| "Uyghur": "uig", |
| "Uzbek": "uzb", |
| "Vietnamese": "vie", |
| "Welsh": "cym", |
| "Xhosa": "xho", |
| "Yiddish": "yid", |
| "Yoruba": "yor", |
| "Zulu": "zul", |
| } |
|
|
| selected_language = st.selectbox("Select Language", list(languages.keys()), index=0) |
|
|
| if st.button("Submit"): |
| if image is not None: |
| image = Image.open(image) |
| result = tesseract_ocr(image, languages[selected_language]) |
| st.subheader("Output:") |
| st.text(result) |
| else: |
| st.warning("Please upload an image.") |
|
|