Spaces:
Running
Running
Commit
·
6c26e5d
1
Parent(s):
ec7206a
Update app.py
Browse files
app.py
CHANGED
|
@@ -24,27 +24,15 @@ def extract_abstract(pdf_bytes):
|
|
| 24 |
|
| 25 |
# Function to process text (summarize and convert to speech)
|
| 26 |
def process_text(uploaded_file):
|
| 27 |
-
#
|
| 28 |
-
print(f"Uploaded file type: {type(uploaded_file)}")
|
| 29 |
-
if isinstance(uploaded_file, dict):
|
| 30 |
-
print("Uploaded file is a dictionary.")
|
| 31 |
-
print(f"Keys available: {uploaded_file.keys()}")
|
| 32 |
-
|
| 33 |
-
# Assuming uploaded_file is a dictionary and contains 'data' key
|
| 34 |
try:
|
| 35 |
-
pdf_bytes = uploaded_file
|
| 36 |
-
except
|
| 37 |
-
print("Error
|
| 38 |
return "File content could not be retrieved", None
|
| 39 |
|
| 40 |
-
#
|
| 41 |
-
|
| 42 |
-
# Extract the file data (byte content) from the uploaded file
|
| 43 |
-
# Check if 'content' key exists, and use it to access the file's byte content
|
| 44 |
-
# if "content" in uploaded_file:
|
| 45 |
-
# pdf_bytes = uploaded_file["data"]
|
| 46 |
-
# else:
|
| 47 |
-
# return "File content could not be retrieved", None
|
| 48 |
|
| 49 |
# Generate summary
|
| 50 |
inputs = tokenizer([abstract_text], max_length=1024, return_tensors='pt', truncation=True)
|
|
|
|
| 24 |
|
| 25 |
# Function to process text (summarize and convert to speech)
|
| 26 |
def process_text(uploaded_file):
|
| 27 |
+
# Attempt to extract byte content from NamedString object
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
try:
|
| 29 |
+
pdf_bytes = uploaded_file.file.read()
|
| 30 |
+
except AttributeError:
|
| 31 |
+
print("Error reading file content from uploaded_file")
|
| 32 |
return "File content could not be retrieved", None
|
| 33 |
|
| 34 |
+
# Extract abstract from PDF
|
| 35 |
+
abstract_text = extract_abstract(pdf_bytes)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
# Generate summary
|
| 38 |
inputs = tokenizer([abstract_text], max_length=1024, return_tensors='pt', truncation=True)
|