Spaces:
Running
Running
Commit
·
8328d05
1
Parent(s):
2a7b912
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,14 +12,19 @@ synthesiser = pipeline("text-to-speech", "suno/bark")
|
|
| 12 |
|
| 13 |
# Function to extract abstract from PDF
|
| 14 |
def extract_abstract(pdf_content):
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
first_page = doc[0].get_text()
|
| 17 |
start_idx = first_page.lower().find("abstract")
|
| 18 |
end_idx = first_page.lower().find("introduction")
|
| 19 |
if start_idx != -1 and end_idx != -1:
|
| 20 |
return first_page[start_idx:end_idx].strip()
|
| 21 |
else:
|
| 22 |
-
return "Abstract not found or '
|
| 23 |
|
| 24 |
# Function to process text (summarize and convert to speech)
|
| 25 |
def process_text(pdf_content):
|
|
|
|
| 12 |
|
| 13 |
# Function to extract abstract from PDF
|
| 14 |
def extract_abstract(pdf_content):
|
| 15 |
+
# Convert the byte stream to bytes
|
| 16 |
+
pdf_bytes = pdf_content.read()
|
| 17 |
+
|
| 18 |
+
# Open the PDF with PyMuPDF
|
| 19 |
+
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
|
| 20 |
+
|
| 21 |
first_page = doc[0].get_text()
|
| 22 |
start_idx = first_page.lower().find("abstract")
|
| 23 |
end_idx = first_page.lower().find("introduction")
|
| 24 |
if start_idx != -1 and end_idx != -1:
|
| 25 |
return first_page[start_idx:end_idx].strip()
|
| 26 |
else:
|
| 27 |
+
return "Abstract not found or 'Introduction' not found in the first page."
|
| 28 |
|
| 29 |
# Function to process text (summarize and convert to speech)
|
| 30 |
def process_text(pdf_content):
|