Spaces:
Build error
Build error
| import streamlit as st | |
| import pandas as pd | |
| import re | |
| from datetime import datetime | |
| import fitz # PyMuPDF | |
| import plotly.express as px | |
| import io | |
| def extract_data_from_pdf(pdf_bytes): | |
| # Open the PDF file from bytes | |
| pdf_document = fitz.open(stream=pdf_bytes, filetype="pdf") | |
| extracted_texts = [page.get_text() for page in pdf_document] | |
| pdf_document.close() | |
| # Define a regular expression pattern to match the lines with temperature data | |
| pattern = re.compile(r'(\+\s\d+\sMin\.|\Début)\s*(\d+°C)\s*(\d+°C)\s*(\d{2},\d{2})') | |
| structured_data = [] | |
| # Process the extracted text from each page | |
| for page_text in extracted_texts: | |
| # Extract metadata | |
| company_match = re.search(r'Entreprise::\s*(.+)', page_text) | |
| date_match = re.search(r'Date:\s*(\d{2}[./]\d{2}[./]\d{4})', page_text) | |
| user_match = re.search(r'Utilisateur:\s*(.+)', page_text) | |
| product_match = re.search(r'Produit:\s*(.+)', page_text) | |
| if not all([company_match, date_match, user_match, product_match]): | |
| continue # Skip pages without metadata | |
| company = company_match.group(1).strip() | |
| date = date_match.group(1).replace('/', '.').strip() | |
| user = user_match.group(1).strip() | |
| product = product_match.group(1).strip() | |
| # Find all matches of the pattern in the text | |
| matches = pattern.findall(page_text) | |
| for match in matches: | |
| time, sterilizer_temp, core_temp, f_value = match | |
| structured_data.append({ | |
| 'Date': datetime.strptime(date, '%d.%m.%Y').strftime('%Y-%m-%d'), | |
| 'Entreprise': company, | |
| 'Utilisateur': user, | |
| 'Produit': product, | |
| 'Déroulement': time.strip(), | |
| 'Temp. du stérilisateur': float(sterilizer_temp.replace('°C', '')), | |
| 'Temp. à coeur': float(core_temp.replace('°C', '')), | |
| 'Valeur F': float(f_value.replace(',', '.')) | |
| }) | |
| return pd.DataFrame(structured_data) | |
| def analyze_sterilization(data): | |
| results = [] | |
| for product, group in data.groupby('Produit'): | |
| # Determine product type and required temperature | |
| is_nutabreizh = 'NutaBreizh' in product | |
| required_temp = 108 if is_nutabreizh else 103 | |
| # Filter rows where core temperature is above or equal to the required temperature | |
| above_required_temp = group[group['Temp. à coeur'] >= required_temp] | |
| # Calculate the duration (in minutes) at the required temperature | |
| minutes_at_temp = len(above_required_temp) | |
| # Calculate max temperatures | |
| max_temp_sterilisateur = group['Temp. du stérilisateur'].max() | |
| max_temp_coeur = group['Temp. à coeur'].max() | |
| # Determine if criteria met (at least 30 minutes at required temperature) | |
| criteria_met = minutes_at_temp >= 30 | |
| results.append({ | |
| 'Date': group['Date'].iloc[0], | |
| 'Produit': product, | |
| 'Utilisateur': group['Utilisateur'].iloc[0], | |
| 'Temperature_Requise': required_temp, | |
| 'Minutes_Temperature_Requise': minutes_at_temp, | |
| 'Temperature_Max_Sterilisateur': max_temp_sterilisateur, | |
| 'Temperature_Max_Coeur': max_temp_coeur, | |
| 'Criteres_Respectes': criteria_met | |
| }) | |
| return pd.DataFrame(results) | |
| def main(): | |
| st.title("Analyse des Protocoles de Stérilisation") | |
| uploaded_file = st.file_uploader("Choisir un fichier PDF", type="pdf") | |
| if uploaded_file is not None: | |
| # Read the uploaded file as bytes | |
| pdf_bytes = uploaded_file.getvalue() | |
| # Process data | |
| data = extract_data_from_pdf(pdf_bytes) | |
| results_df = analyze_sterilization(data) | |
| # Display results | |
| st.subheader("Résultats de l'analyse") | |
| st.dataframe(results_df) | |
| # Check if all criteria are met | |
| if not results_df['Criteres_Respectes'].all(): | |
| st.warning("Attention : Certains produits n'ont pas respecté les critères de stérilisation.") | |
| # Display failed products | |
| failed_products = results_df[results_df['Criteres_Respectes'] == False] | |
| if not failed_products.empty: | |
| st.subheader("Produits n'ayant pas respecté les critères") | |
| st.dataframe(failed_products) | |
| # Create visualization | |
| if not results_df.empty: | |
| fig = px.scatter(results_df, | |
| x='Date', | |
| y='Minutes_Temperature_Requise', | |
| color='Criteres_Respectes', | |
| hover_data=['Produit', 'Temperature_Requise'], | |
| title="Minutes à température requise par production") | |
| st.plotly_chart(fig) | |
| else: | |
| st.warning("Aucune donnée valide à afficher.") | |
| # Export button | |
| if st.button("Exporter en Excel"): | |
| output = io.BytesIO() | |
| with pd.ExcelWriter(output, engine='xlsxwriter') as writer: | |
| results_df.to_excel(writer, index=False) | |
| output.seek(0) | |
| st.download_button( | |
| label="Télécharger l'analyse", | |
| data=output, | |
| file_name="analyse_sterilisation.xlsx", | |
| mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | |
| ) | |
| if __name__ == "__main__": | |
| main() |