| | import streamlit as st |
| | from transformers import TrOCRProcessor, VisionEncoderDecoderModel |
| | from PIL import Image |
| | import torch |
| |
|
| | |
| | st.set_page_config(page_title="OCR Manuscrit avec TrOCR", layout="centered") |
| |
|
| | st.title("✍️ OCR de texte manuscrit avec TrOCR") |
| | st.write("Chargez une image contenant du texte manuscrit pour en extraire le contenu.") |
| |
|
| | |
| | @st.cache_resource |
| | def load_model(): |
| | processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") |
| | model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten") |
| | return processor, model |
| |
|
| | processor, model = load_model() |
| |
|
| | |
| | uploaded_file = st.file_uploader("📤 Charger une image manuscrite (format .png ou .jpg)", type=["png", "jpg", "jpeg"]) |
| |
|
| | if uploaded_file is not None: |
| | image = Image.open(uploaded_file).convert("RGB") |
| | st.image(image, caption="🖼️ Image chargée", use_column_width=True) |
| |
|
| | if st.button("🔍 Lancer la reconnaissance"): |
| | with st.spinner("Reconnaissance en cours..."): |
| | pixel_values = processor(images=image, return_tensors="pt").pixel_values |
| | generated_ids = model.generate(pixel_values) |
| | generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] |
| |
|
| | st.success("✅ Texte reconnu :") |
| | st.text_area("📝 Résultat OCR", generated_text, height=150) |
| |
|
| |
|