Update app.py
Browse files
app.py
CHANGED
|
@@ -37,14 +37,24 @@ def load_file(file_obj):
|
|
| 37 |
docs = []
|
| 38 |
text_data = ""
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
# 切块
|
| 50 |
chunks = [text_data[i:i+500] for i in range(0, len(text_data), 500)]
|
|
@@ -56,7 +66,6 @@ def load_file(file_obj):
|
|
| 56 |
index.add(doc_embeddings)
|
| 57 |
|
| 58 |
return f"已加载 {len(docs)} 个文本块", None
|
| 59 |
-
|
| 60 |
# ===== RAG 查询函数 =====
|
| 61 |
def rag_query(query):
|
| 62 |
if index is None:
|
|
|
|
| 37 |
docs = []
|
| 38 |
text_data = ""
|
| 39 |
|
| 40 |
+
ext = os.path.splitext(file_obj.name)[1].lower()
|
| 41 |
+
|
| 42 |
+
try:
|
| 43 |
+
if ext == ".pdf":
|
| 44 |
+
reader = PdfReader(file_obj.name)
|
| 45 |
+
for page in reader.pages:
|
| 46 |
+
page_text = page.extract_text()
|
| 47 |
+
if page_text:
|
| 48 |
+
text_data += page_text + "\n"
|
| 49 |
+
elif ext == ".txt":
|
| 50 |
+
text_data = file_obj.read().decode("utf-8", errors="ignore")
|
| 51 |
+
else:
|
| 52 |
+
return "仅支持 PDF 或 TXT 文件", None
|
| 53 |
+
except Exception as e:
|
| 54 |
+
return f"文件解析失败: {str(e)}", None
|
| 55 |
+
|
| 56 |
+
if not text_data.strip():
|
| 57 |
+
return "未能从文件中提取到文本", None
|
| 58 |
|
| 59 |
# 切块
|
| 60 |
chunks = [text_data[i:i+500] for i in range(0, len(text_data), 500)]
|
|
|
|
| 66 |
index.add(doc_embeddings)
|
| 67 |
|
| 68 |
return f"已加载 {len(docs)} 个文本块", None
|
|
|
|
| 69 |
# ===== RAG 查询函数 =====
|
| 70 |
def rag_query(query):
|
| 71 |
if index is None:
|