Spaces:
Running
Running
| import streamlit as st | |
| import streamlit.components.v1 as components | |
| import base64 | |
| import tempfile | |
| import os | |
| from mistralai import Mistral | |
| from PIL import Image | |
| import io | |
| import json | |
| import pandas as pd | |
| from typing import List, Tuple, Dict, Any | |
| from dotenv import load_dotenv | |
| from pdf2image import convert_from_bytes | |
| from langfuse import propagate_attributes, get_client | |
| load_dotenv() | |
| langfuse = get_client() | |
| MISTRAL_API_KEY = os.environ.get("MISTRAL_API_KEY") | |
| MODEL_ID = "mistral-large-latest" | |
| # langfuse informations | |
| AGENT_URL = os.environ.get("AGENT_URL", st.context.url) | |
| if "ajs_anonymous_id" in st.context.cookies: | |
| SESSION_ID = st.context.cookies["ajs_anonymous_id"] | |
| else: | |
| SESSION_ID = "" | |
| if "user_id" in st.query_params: | |
| DEFAULT_USER_ID = st.query_params["user_id"] | |
| else: | |
| DEFAULT_USER_ID = "" | |
| if MISTRAL_API_KEY is None: | |
| raise RuntimeError("Merci de renseigner la variable d'environnement MISTRAL_API_KEY.") | |
| client = Mistral(api_key=MISTRAL_API_KEY) | |
| def load_image_from_upload(uploaded_file) -> Image.Image: | |
| return Image.open(io.BytesIO(uploaded_file.read())).convert("RGB") | |
| def center_crop_to_square(img: Image.Image) -> Image.Image: | |
| width, height = img.size | |
| if width == height: | |
| return img | |
| if width > height: | |
| offset = (width - height) // 2 | |
| box = (offset, 0, offset + height, height) | |
| else: | |
| offset = (height - width) // 2 | |
| box = (0, offset, width, offset + width) | |
| return img.crop(box) | |
| def resize_for_vlm(img: Image.Image, max_size: int = 1024) -> Image.Image: | |
| width, height = img.size | |
| scale = min(max_size / width, max_size / height, 1.0) | |
| if scale == 1.0: | |
| return img | |
| new_w = int(width * scale) | |
| new_h = int(height * scale) | |
| return img.resize((new_w, new_h), Image.LANCZOS) | |
| def stack_images_vertically(images: List[Image.Image]) -> Image.Image: | |
| if not images: | |
| raise ValueError("Aucune page n'a été convertie en image.") | |
| target_width = images[0].size[0] | |
| resized_images = [] | |
| for img in images: | |
| if img.size[0] != target_width: | |
| aspect_ratio = img.size[1] / img.size[0] | |
| new_height = int(target_width * aspect_ratio) | |
| img = img.resize((target_width, new_height), Image.LANCZOS) | |
| resized_images.append(img) | |
| total_height = sum(img.size[1] for img in resized_images) | |
| stacked = Image.new('RGB', (target_width, total_height)) | |
| y_offset = 0 | |
| for img in resized_images: | |
| stacked.paste(img, (0, y_offset)) | |
| y_offset += img.size[1] | |
| return stacked | |
| def uploaded_file_to_square_base64(uploaded_file) -> Tuple[str, str]: | |
| mime_type = uploaded_file.type | |
| raw_bytes = uploaded_file.getvalue() | |
| if mime_type == "application/pdf": | |
| pages = convert_from_bytes(raw_bytes) | |
| pages_rgb = [page.convert("RGB") for page in pages] | |
| img = stack_images_vertically(pages_rgb) | |
| img = resize_for_vlm(img, max_size=1024) | |
| mime_type = "image/png" | |
| else: | |
| img = Image.open(io.BytesIO(raw_bytes)).convert("RGB") | |
| img = center_crop_to_square(img) | |
| img = resize_for_vlm(img, max_size=1024) | |
| return mime_type, image_to_base64_data_url(img, mime_type=mime_type) | |
| def image_to_base64_data_url(img: Image.Image, mime_type: str = "image/png") -> str: | |
| buffer = io.BytesIO() | |
| if mime_type == "image/jpeg": | |
| img.save(buffer, format="JPEG", quality=90) | |
| else: | |
| img.save(buffer, format="PNG") | |
| mime_type = "image/png" | |
| b64 = base64.b64encode(buffer.getvalue()).decode("utf-8") | |
| return f"data:{mime_type};base64,{b64}" | |
| def call_mistral_large_multimodal( | |
| mime_type: str, | |
| image_data_url: str, | |
| user_instruction: str, | |
| languages: List[str], | |
| ) -> Dict[str, Any]: | |
| with langfuse.start_as_current_observation( | |
| as_type="generation", | |
| name="mistral_multimodal_ocr_invoice_analysis", | |
| model=MODEL_ID, | |
| ) as root_span: | |
| with propagate_attributes(user_id=DEFAULT_USER_ID, session_id=SESSION_ID, metadata={"app_url": AGENT_URL}): | |
| json_schema = { | |
| "type": "object", | |
| "properties": { | |
| "csv_tables": { | |
| "type": "array", | |
| "items": {"type": "string"}, | |
| "description": "Each item is a CSV string representing one table found in the image corresponding to the invoice item." | |
| }, | |
| "supplier": { | |
| "type": "array", | |
| "items": {"type": "string"}, | |
| "description": "Get information about the supplier (name, location, SIRET, etc.) if present in the invoice. Return up to 5 key bullet points about the supplier." | |
| }, | |
| "taxes": { | |
| "type": "array", | |
| "items": {"type": "string"}, | |
| "description": "Up to 5 taxes present in the invoice (must be in rate of 0, 2.1, 5.5, 10 or 20)." | |
| }, | |
| "anomalies": { | |
| "type": "array", | |
| "items": {"type": "string"}, | |
| "description": "Any anomalies, outliers, or surprising patterns you detect." | |
| }, | |
| "translations": { | |
| "type": "object", | |
| "properties": { | |
| lang: {"type": "string"} for lang in languages | |
| }, | |
| "description": "Short high-level summaries in the selected languages." | |
| }, | |
| }, | |
| "required": ["csv_tables", "supplier", "taxes"], | |
| "additionalProperties": False, | |
| } | |
| system_prompt = ( | |
| "You are a Multimodal Intelligence OCR and invoice classification for french farmer using Mistral Large 3.\n" | |
| "You are given a single document-like image (e.g. chart + table, financial report page).\n\n" | |
| "Your tasks:\n" | |
| "1. Read all visible text and numbers directly from the image.\n" | |
| "2. Reconstruct any clearly visible tables into valid CSV strings.\n" | |
| " - Use the first row as headers when possible.\n" | |
| " - Use commas as separators and newline per row.\n" | |
| "3. Collect information about the supplier of the invoice.\n" | |
| "4. Collect any taxes detected in the invoice (must be in rate of 0, 2.1, 5.5, 10 or 20) and make a table resume.\n" | |
| "5. Detect any anomalies or surprising patterns if present (else return an empty list).\n" | |
| "6. Provide short summaries in the requested languages.\n\n" | |
| "You MUST respond ONLY with a JSON object that matches the provided JSON schema.\n" | |
| " Most of the time, the pattern of a purchase invoice is composed of supplier informations, invoice informations and one or many invoice lines.\n" | |
| "for the items, try to detect the role of the item in 'merchandise' or 'service' in role attribute.\n" | |
| "for all the date, try to convert it in the following format : 'DD/MM/YYYY'\n" | |
| "for the items, try to classify it like an accountant in nature attribute.\n" | |
| "Do not include any extra commentary outside of the JSON.\n" | |
| "Response must be in French language." | |
| ) | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": [ | |
| {"type": "text", "text": system_prompt}, | |
| ], | |
| }, | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "text", | |
| "text": user_instruction or "Analyze this invoice report page.", | |
| }, | |
| { | |
| "type": "image_url", | |
| "image_url": image_data_url, | |
| }, | |
| ], | |
| }, | |
| ] | |
| response = client.chat.complete( | |
| model=MODEL_ID, | |
| messages=messages, | |
| temperature=0.2, | |
| max_tokens=2048, | |
| response_format={ | |
| "type": "json_schema", | |
| "json_schema": { | |
| "name": "multimodal_intel_eval", | |
| "schema": json_schema, | |
| "strict": True, | |
| }, | |
| }, | |
| ) | |
| root_span.update( | |
| input=messages, | |
| # usage_details={ "input": response.usage.prompt_tokens, "output": response.usage.completion_tokens}, | |
| output=response.choices[0].message.content | |
| ) | |
| content = response.choices[0].message.content | |
| try: | |
| parsed = json.loads(content) | |
| except json.JSONDecodeError: | |
| try: | |
| start = content.index("{") | |
| end = content.rindex("}") + 1 | |
| parsed = json.loads(content[start:end]) | |
| except Exception: | |
| raise ValueError(f"Model did not return valid JSON. Raw content:\n{content}") | |
| return parsed | |
| # Configuration de la page - DOIT être la première commande Streamlit | |
| st.set_page_config(page_title="OCR Facture avec Mistral Large 3", layout="wide") | |
| st.title("OCR Facture achat agricole avec Mistral") | |
| st.caption("Powered by **Mistral Large 3**") | |
| col_left, col_right = st.columns([2, 1]) | |
| with col_left: | |
| uploaded_file = st.file_uploader( | |
| "Charger une image ou un document (PNG, JPG, WEBP, ou PDF)", | |
| type=["png", "jpg", "jpeg", "webp", "pdf"], | |
| ) | |
| default_prompt = ( | |
| "Donne moi un analyse du fournisseur et des taxes présentes de cet element, detecte les anomalies potentielles et exporte moi les lignes de factures en CSV " | |
| "Realise egalement une classification des lignes en 'produit' ou 'service' et une classification comptable en nature pour chaque ligne. " | |
| ) | |
| user_instruction = st.text_area( | |
| "Instruction pour Mistral Large 3", | |
| value=default_prompt, | |
| height=120, | |
| ) | |
| with col_right: | |
| st.subheader("Options Traduction") | |
| languages = st.multiselect( | |
| "Tradcution complémentaires", | |
| options=["fr", "de", "es", "hi", "zh", "ja", "en"], | |
| default=["en"], | |
| help="Mistral Large 3 supporte plusieurs langues.", | |
| label_visibility="collapsed", | |
| ) | |
| run_button = st.button("Lancer", type="primary") | |
| if run_button: | |
| if uploaded_file is None: | |
| st.error("Merci de charger un fichier image ou PDF.") | |
| st.stop() | |
| prep_msg = "Préparation du PDF (combinaison de toutes les pages)..." if uploaded_file.type == "application/pdf" else "Préparation de l'image..." | |
| with st.spinner(prep_msg): | |
| mime_type, data_url = uploaded_file_to_square_base64(uploaded_file) | |
| mime, b64_part = data_url.split(",", 1) | |
| img_bytes = base64.b64decode(b64_part) | |
| st.image(img_bytes, caption="Image centrée et redimensionnée pour le modèle", width=400) | |
| with st.spinner("En cours..."): | |
| try: | |
| result = call_mistral_large_multimodal( | |
| mime_type=mime_type, | |
| image_data_url=data_url, | |
| user_instruction=user_instruction, | |
| languages=languages, | |
| ) | |
| except Exception as e: | |
| st.error(f"Erreur Mistral: {e}") | |
| st.stop() | |
| st.header("Resultats de l'analyse") | |
| csv_tables = result.get("csv_tables", []) | |
| if csv_tables: | |
| st.subheader("Tables (CSV)") | |
| for i, csv_str in enumerate(csv_tables): | |
| st.markdown(f"**Table {i+1}**") | |
| try: | |
| df = pd.read_csv(io.StringIO(csv_str)) | |
| st.dataframe(df, use_container_width=True) | |
| except Exception: | |
| st.text_area(f"CSV for Table {i+1}", value=csv_str, height=150) | |
| st.download_button( | |
| label=f"Télécharger Table {i+1} en CSV", | |
| data=csv_str, | |
| file_name=f"table_{i+1}.csv", | |
| mime="text/csv", | |
| key=f"csv_download_{i}", | |
| ) | |
| else: | |
| st.info("Aucune table n'a été détectée.") | |
| supplier = result.get("supplier", []) | |
| taxes = result.get("taxes", []) | |
| anomalies = result.get("anomalies", []) | |
| col_ins, col_risk = st.columns(2) | |
| with col_ins: | |
| st.subheader("Fourniseur") | |
| if supplier: | |
| for bullet in supplier: | |
| st.markdown(f"- {bullet}") | |
| else: | |
| st.write("_No explicit supplier returned._") | |
| with col_risk: | |
| st.subheader("Taxes") | |
| if taxes: | |
| for bullet in taxes: | |
| st.markdown(f"- {bullet}") | |
| else: | |
| st.write("_No explicit taxes returned._") | |
| st.subheader("Anomalies") | |
| if anomalies: | |
| for bullet in anomalies: | |
| st.markdown(f"- {bullet}") | |
| else: | |
| st.write("_No anomalies reported._") | |
| translations = result.get("translations", {}) or {} | |
| if translations: | |
| st.subheader(" Résumés en plusieurs langues") | |
| for lang_code, summary in translations.items(): | |
| with st.expander(f"Résumé en {lang_code}"): | |
| st.write(summary) | |
| else: | |
| st.info("Aucun résumé multilingue n'a été demandé ou retourné.") |