from PyPDF2 import PdfReader import re import streamlit as st import fitz from transformers import pipeline import os import requests import io from PIL import Image API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5" headers = {"Authorization": "Bearer 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"} summarizer = pipeline("summarization", model="facebook/bart-large-cnn") def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.content def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10): paragraphs = [] try: pdf_stream = io.BytesIO(pdf_data) pdf_document = fitz.open(stream=pdf_stream, filetype="pdf") for page_number in range(pdf_document.page_count): page = pdf_document.load_page(page_number) blocks = page.get_text("blocks") current_paragraph = "" previous_bottom = None for block in blocks: x0, y0, x1, y1 = block[:4] # Coordonnées du bloc de texte text = block[4] # Texte du bloc # Mesurez l'espacement vertical entre les blocs de texte if previous_bottom is not None: vertical_spacing = y0 - previous_bottom else: vertical_spacing = 0 # Si l'espacement vertical dépasse le seuil, considérez-le comme un nouveau paragraphe if vertical_spacing > spacing_threshold: if current_paragraph: paragraphs.append(current_paragraph.strip()) current_paragraph = text else: current_paragraph += " " + text # Ajoutez le texte au paragraphe actuel previous_bottom = y1 # Ajoutez le dernier paragraphe de la page if current_paragraph: paragraphs.append(current_paragraph.strip()) pdf_document.close() except Exception as e: print(f"Erreur lors de l'extraction du PDF : {str(e)}") return paragraphs #def extract_paragraph(texte): # paragraph = texte.split("\n\n") # return paragraph st.title("PDF2SLIDE") uploaded_file = st.file_uploader("Selectionnez un PDF", type=["pdf"]) if uploaded_file is not None: pdf_data = uploaded_file.read() paragraphs = extract_paragraphs_by_vertical_spacing(pdf_data) i = 1 for paragraph in paragraphs: summary = summarizer(paragraph, max_length=(len(paragraph)/2), min_length=10, do_sample=False) st.text(f"Paragraphe {i}: {summary[0]['summary_text']}") # Affiche le résumé du paragraphe image_bytes = query({ "inputs": summary[0]['summary_text'] # Utilisez le texte du résumé }) image = Image.open(io.BytesIO(image_bytes)) st.image(image) i = i + 1