Spaces:
Runtime error
Runtime error
Liam Dyer
commited on
fix metadata
Browse files
app.py
CHANGED
|
@@ -24,7 +24,16 @@ def convert(pdf_file):
|
|
| 24 |
for idx, page in enumerate(reader.pages):
|
| 25 |
full_text += f"\n\n---- Page {idx} ----\n\n" + page.extract_text()
|
| 26 |
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
|
| 30 |
gr.Interface(
|
|
|
|
| 24 |
for idx, page in enumerate(reader.pages):
|
| 25 |
full_text += f"\n\n---- Page {idx} ----\n\n" + page.extract_text()
|
| 26 |
|
| 27 |
+
# Extract metadata
|
| 28 |
+
metadata = {
|
| 29 |
+
"author": reader.metadata.author,
|
| 30 |
+
"creator": reader.metadata.creator,
|
| 31 |
+
"producer": reader.metadata.producer,
|
| 32 |
+
"subject": reader.metadata.subject,
|
| 33 |
+
"title": reader.metadata.title,
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
return full_text, metadata
|
| 37 |
|
| 38 |
|
| 39 |
gr.Interface(
|