yannESGI commited on
Commit
1162522
·
1 Parent(s): 7c4e057

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -137
app.py DELETED
@@ -1,137 +0,0 @@
1
- from PyPDF2 import PdfReader
2
- import re
3
- import streamlit as st
4
- import fitz
5
- from transformers import pipeline
6
- import os
7
- import requests
8
- import io
9
- from PIL import Image
10
- from pptx import Presentation
11
- from pptx.util import Inches, Pt
12
- import tempfile
13
-
14
- API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5"
15
- headers = {"Authorization": "Bearer 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"}
16
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
17
-
18
- def query(payload):
19
- response = requests.post(API_URL, headers=headers, json=payload)
20
- return response.content
21
-
22
- def add_line_breaks_to_summary(summary_text, line_length):
23
- # Split the summary text into lines without breaking words
24
- lines = []
25
- words = summary_text.split()
26
- current_line = ""
27
- for word in words:
28
- if len(current_line) + len(word) + 1 <= line_length: # Include space between words
29
- if current_line:
30
- current_line += " "
31
- current_line += word
32
- else:
33
- lines.append(current_line)
34
- current_line = word
35
- if current_line:
36
- lines.append(current_line)
37
- return "\n".join(lines)
38
-
39
- def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10):
40
- paragraphs = []
41
-
42
- try:
43
- pdf_stream = io.BytesIO(pdf_data)
44
- pdf_document = fitz.open(stream=pdf_stream, filetype="pdf")
45
-
46
- for page_number in range(pdf_document.page_count):
47
- page = pdf_document.load_page(page_number)
48
- blocks = page.get_text("blocks")
49
-
50
- current_paragraph = ""
51
- previous_bottom = None
52
-
53
- for block in blocks:
54
- x0, y0, x1, y1 = block[:4]
55
- text = block[4]
56
-
57
- if previous_bottom is not None:
58
- vertical_spacing = y0 - previous_bottom
59
- else:
60
- vertical_spacing = 0
61
-
62
- if vertical_spacing > spacing_threshold:
63
- if current_paragraph:
64
- paragraphs.append(current_paragraph.strip())
65
- current_paragraph = text
66
- else:
67
- current_paragraph += " " + text
68
-
69
- previous_bottom = y1
70
-
71
- if current_paragraph:
72
- paragraphs.append(current_paragraph.strip())
73
-
74
- pdf_document.close()
75
- except Exception as e:
76
- print(f"Erreur lors de l'extraction du PDF : {str(e)}")
77
-
78
- return paragraphs
79
-
80
- st.title("PDF2SLIDE")
81
-
82
- uploaded_file = st.file_uploader("Select a PDF", type=["pdf"])
83
-
84
- if uploaded_file is not None:
85
- pdf_data = uploaded_file.read()
86
-
87
- paragraphs = extract_paragraphs_by_vertical_spacing(pdf_data)
88
- i = 1
89
-
90
- # Create a PowerPoint presentation
91
- prs = Presentation()
92
-
93
- for paragraph in paragraphs:
94
-
95
- summary = summarizer(paragraph, max_length=(round(len(paragraph) / 3)), min_length=10, do_sample=False)
96
- if summary and len(summary) > 0 and 'summary_text' in summary[0]:
97
- summary_text = add_line_breaks_to_summary(summary[0]['summary_text'], 80)
98
-
99
-
100
- # Generate and save the image to a temporary file
101
- image_bytes = query({
102
- "inputs": 'A picture without text about: ' + summary[0]['summary_text']
103
- })
104
- temp_img_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
105
- with open(temp_img_path, "wb") as img_file:
106
- img_file.write(image_bytes)
107
-
108
- # Create a slide
109
- slide = prs.slides.add_slide(prs.slide_layouts[5])
110
-
111
- # Add the image to the slide at the bottom with a 0.5-inch space
112
- left = (prs.slide_width - Inches(3)) / 2
113
- top = prs.slide_height - Inches(3) - Inches(0.5) # Adjusted for the 0.5-inch space
114
- pic = slide.shapes.add_picture(temp_img_path, left, top, Inches(3), Inches(3))
115
-
116
- # Add the paragraph to the slide at the top
117
- left = Inches(1)
118
- top = Inches(1)
119
- width = Inches(8) # Adjust the width as needed
120
- height = Inches(2) # Adjust the height as needed
121
- txBox = slide.shapes.add_textbox(left, top, width, height)
122
- tf = txBox.text_frame
123
- p = tf.add_paragraph()
124
- p.text = summary_text
125
- p.space_after = Pt(0) # Adjust the spacing as needed
126
-
127
- # Save the PowerPoint presentation
128
- presentation_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pptx").name
129
- prs.save(presentation_path)
130
-
131
- # Display a download button for the PowerPoint file
132
- st.download_button(
133
- label="Download PowerPoint Presentation",
134
- data=open(presentation_path, "rb"),
135
- key="download_ppt",
136
- file_name="PDF2SLIDE_Presentation.pptx",
137
- )