| import streamlit as st |
| from PIL import Image |
| import easyocr |
| from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
| |
| def extract_text(image): |
| reader = easyocr.Reader(['en']) |
| extracted_text = reader.readtext(image) |
| return ' '.join([result[1] for result in extracted_text]) |
|
|
| |
| def explain_text(text, tokenizer, model): |
| input_ids = tokenizer.encode(text, return_tensors="pt", max_length=256, truncation=True) |
| explanation_ids = model.generate(input_ids, max_length=100, num_return_sequences=1) |
| explanation = tokenizer.decode(explanation_ids[0], skip_special_tokens=True) |
| return explanation |
|
|
| st.title("Text Extraction and Explanation") |
|
|
| |
| uploaded_file = st.file_uploader("Upload an image:") |
|
|
| |
| tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-alpha") |
| model = AutoModelForCausalLM.from_pretrained("HuggingFaceH4/zephyr-7b-alpha") |
|
|
| if uploaded_file is not None: |
| |
| image = Image.open(uploaded_file) |
|
|
| |
| extracted_text = extract_text(image) |
|
|
| |
| explanation = explain_text(extracted_text, tokenizer, model) |
|
|
| |
| st.markdown("**Extracted text:**") |
| st.markdown(extracted_text) |
|
|
| st.markdown("**Explanation:**") |
| st.markdown(explanation) |
|
|
| else: |
| st.markdown("Please upload an image to extract text and get an explanation.") |