Spaces:
Build error
Build error
Your Name commited on
Commit ยท
dbaf019
1
Parent(s): 3055700
Update app.py and requirements.txt with PDF extraction support
Browse files
app.py
CHANGED
|
@@ -29,15 +29,14 @@ dataset = load_dataset("dgmos/ericsson-manuals", split="train")
|
|
| 29 |
# 4. PDF ํ
์คํธ ์ถ์ถ
|
| 30 |
docs = []
|
| 31 |
for item in dataset:
|
| 32 |
-
# ์ค์ ํ๋๋ช
ํ์ธ (์: "file", "path")
|
| 33 |
-
pdf_path = item["file"]
|
| 34 |
-
|
| 35 |
-
# PDF ์ด๊ธฐ ๋ฐ ํ
์คํธ ์ถ์ถ
|
| 36 |
try:
|
| 37 |
with pdfplumber.open(pdf_path) as pdf:
|
| 38 |
text = "
|
| 39 |
".join([page.extract_text() or "" for page in pdf.pages])
|
| 40 |
-
docs.append(text)
|
| 41 |
except Exception as e:
|
| 42 |
print(f"PDF ์ฒ๋ฆฌ ์ค ์ค๋ฅ: {pdf_path} - {str(e)}")
|
| 43 |
continue
|
|
|
|
| 29 |
# 4. PDF ํ
์คํธ ์ถ์ถ
|
| 30 |
docs = []
|
| 31 |
for item in dataset:
|
| 32 |
+
# ์ค์ ํ๋๋ช
ํ์ธ ํ์ (์: "file", "path")
|
| 33 |
+
pdf_path = item["file"]
|
| 34 |
+
|
|
|
|
| 35 |
try:
|
| 36 |
with pdfplumber.open(pdf_path) as pdf:
|
| 37 |
text = "
|
| 38 |
".join([page.extract_text() or "" for page in pdf.pages])
|
| 39 |
+
docs.append({"page_content": text})
|
| 40 |
except Exception as e:
|
| 41 |
print(f"PDF ์ฒ๋ฆฌ ์ค ์ค๋ฅ: {pdf_path} - {str(e)}")
|
| 42 |
continue
|