import streamlit as st from transformers import TrOCRProcessor, VisionEncoderDecoderModel from PIL import Image import torch # Configuration de la page st.set_page_config(page_title="OCR Manuscrit avec TrOCR", layout="centered") st.title("✍️ OCR de texte manuscrit avec TrOCR") st.write("Chargez une image contenant du texte manuscrit pour en extraire le contenu.") # Chargement du modèle TrOCR pour manuscrit @st.cache_resource def load_model(): processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten") return processor, model processor, model = load_model() # Upload de l'image uploaded_file = st.file_uploader("📤 Charger une image manuscrite (format .png ou .jpg)", type=["png", "jpg", "jpeg"]) if uploaded_file is not None: image = Image.open(uploaded_file).convert("RGB") st.image(image, caption="🖼️ Image chargée", use_column_width=True) if st.button("🔍 Lancer la reconnaissance"): with st.spinner("Reconnaissance en cours..."): pixel_values = processor(images=image, return_tensors="pt").pixel_values generated_ids = model.generate(pixel_values) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] st.success("✅ Texte reconnu :") st.text_area("📝 Résultat OCR", generated_text, height=150)