Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,12 +23,14 @@ db_name = "vector_db"
|
|
| 23 |
# Load environment variables in a file called .env
|
| 24 |
|
| 25 |
load_dotenv(override=True)
|
| 26 |
-
|
| 27 |
|
| 28 |
def process_pdf(pdf_file):
|
| 29 |
loader = PyPDFLoader(pdf_file.name)
|
| 30 |
pages = loader.load()
|
| 31 |
-
|
|
|
|
|
|
|
| 32 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 33 |
chunk_size=500,
|
| 34 |
chunk_overlap=50
|
|
|
|
| 23 |
# Load environment variables in a file called .env
|
| 24 |
|
| 25 |
load_dotenv(override=True)
|
| 26 |
+
|
| 27 |
|
| 28 |
def process_pdf(pdf_file):
|
| 29 |
loader = PyPDFLoader(pdf_file.name)
|
| 30 |
pages = loader.load()
|
| 31 |
+
if pages["page_content"] is None:
|
| 32 |
+
raise ValueError(f"No text chunks generated from {pdf_file}")
|
| 33 |
+
|
| 34 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 35 |
chunk_size=500,
|
| 36 |
chunk_overlap=50
|