Spaces:
Sleeping
Sleeping
Commit ·
37d6769
1
Parent(s): 5a41657
added chunk_text
Browse files
app.py
CHANGED
|
@@ -126,7 +126,7 @@ def add_to_vector_index(new_chunks, file_info):
|
|
| 126 |
new_chunk_data.append({
|
| 127 |
'chunk_id': doc_id,
|
| 128 |
'document_id': file_info['file_name'],
|
| 129 |
-
'
|
| 130 |
})
|
| 131 |
|
| 132 |
if chunks_df is not None:
|
|
@@ -145,7 +145,7 @@ def add_to_vector_index(new_chunks, file_info):
|
|
| 145 |
}])
|
| 146 |
documents_df = pd.concat([documents_df, new_doc_info], ignore_index=True)
|
| 147 |
|
| 148 |
-
all_documents = [Document(text=str(row['
|
| 149 |
metadata={"chunk_id": row['chunk_id'], "document_id": row['document_id']})
|
| 150 |
for _, row in chunks_df.iterrows()]
|
| 151 |
|
|
@@ -245,7 +245,7 @@ def initialize_models():
|
|
| 245 |
chunks_df = pd.read_csv(chunks_csv_path)
|
| 246 |
except:
|
| 247 |
log_message("📝 Создание пустой базы чанков...")
|
| 248 |
-
chunks_df = pd.DataFrame(columns=['chunk_id', 'document_id', '
|
| 249 |
|
| 250 |
log_message("🤖 Настройка моделей...")
|
| 251 |
embed_model = HuggingFaceEmbedding(model_name=EMBEDDING_MODEL)
|
|
|
|
| 126 |
new_chunk_data.append({
|
| 127 |
'chunk_id': doc_id,
|
| 128 |
'document_id': file_info['file_name'],
|
| 129 |
+
'chunk_text': chunk
|
| 130 |
})
|
| 131 |
|
| 132 |
if chunks_df is not None:
|
|
|
|
| 145 |
}])
|
| 146 |
documents_df = pd.concat([documents_df, new_doc_info], ignore_index=True)
|
| 147 |
|
| 148 |
+
all_documents = [Document(text=str(row['chunk_text']),
|
| 149 |
metadata={"chunk_id": row['chunk_id'], "document_id": row['document_id']})
|
| 150 |
for _, row in chunks_df.iterrows()]
|
| 151 |
|
|
|
|
| 245 |
chunks_df = pd.read_csv(chunks_csv_path)
|
| 246 |
except:
|
| 247 |
log_message("📝 Создание пустой базы чанков...")
|
| 248 |
+
chunks_df = pd.DataFrame(columns=['chunk_id', 'document_id', 'chunk_text'])
|
| 249 |
|
| 250 |
log_message("🤖 Настройка моделей...")
|
| 251 |
embed_model = HuggingFaceEmbedding(model_name=EMBEDDING_MODEL)
|