Spaces:
Sleeping
Sleeping
Commit
Β·
e2b23b2
1
Parent(s):
feff33b
Update app.py
Browse files
app.py
CHANGED
|
@@ -25,15 +25,30 @@ def get_pdf_text(pdf_docs):
|
|
| 25 |
|
| 26 |
# κ³Όμ
|
| 27 |
# μλ ν
μ€νΈ μΆμΆ ν¨μλ₯Ό μμ±
|
| 28 |
-
def get_text_file(
|
| 29 |
try:
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
-
|
| 37 |
|
| 38 |
def get_csv_file(docs):
|
| 39 |
pass
|
|
|
|
| 25 |
|
| 26 |
# κ³Όμ
|
| 27 |
# μλ ν
μ€νΈ μΆμΆ ν¨μλ₯Ό μμ±
|
| 28 |
+
def get_text_file(docs):
|
| 29 |
try:
|
| 30 |
+
# μμ λλ ν 리 μμ±
|
| 31 |
+
temp_dir = tempfile.TemporaryDirectory()
|
| 32 |
+
|
| 33 |
+
# μμ νμΌ μμ±
|
| 34 |
+
temp_file = tempfile.NamedTemporaryFile(dir=temp_dir.name, suffix=".txt", delete=False)
|
| 35 |
+
|
| 36 |
+
# docsλ‘ μ λ¬λ νμΌ λ΄μ©μ μμ νμΌμ μ
|
| 37 |
+
for file in docs:
|
| 38 |
+
# νμΌμμ ν
μ€νΈλ₯Ό μ½μ΄μ μμ νμΌμ μ
|
| 39 |
+
text_content = file.read().decode('utf-8') # νμΌμμ ν
μ€νΈ μ½κΈ°
|
| 40 |
+
temp_file.write(text_content.encode('utf-8')) # μμ νμΌμ ν
μ€νΈ μ°κΈ°
|
| 41 |
+
|
| 42 |
+
# μμ νμΌ μ½κΈ°
|
| 43 |
+
temp_file.seek(0) # νμΌ ν¬μΈν°λ₯Ό νμΌμ μμμΌλ‘ μ΄λ
|
| 44 |
+
extracted_text = temp_file.read().decode('utf-8') # μμ νμΌμμ ν
μ€νΈ μ½κΈ°
|
| 45 |
+
|
| 46 |
+
# μμ νμΌ λ° λλ ν 리 μμ
|
| 47 |
+
temp_file.close()
|
| 48 |
+
temp_dir.cleanup()
|
| 49 |
+
|
| 50 |
+
return extracted_text
|
| 51 |
|
|
|
|
| 52 |
|
| 53 |
def get_csv_file(docs):
|
| 54 |
pass
|