Spaces:
Runtime error
Runtime error
| import os | |
| import glob | |
| import uuid | |
| from dotenv import load_dotenv | |
| from backend.app.utils.chunking import semantic_chunking | |
| from backend.app.services.embedding_service import EmbeddingService | |
| from backend.app.services.qdrant_service import QdrantService | |
| load_dotenv() | |
| def index_chapters(): | |
| print("Starting chapter indexing...") | |
| # Initialize services (assuming they can be initialized without FastAPI app context for script use) | |
| embedding_service = EmbeddingService() # Assuming default constructor | |
| qdrant_service = QdrantService() # Assuming default constructor | |
| chapter_files = glob.glob("frontend/docs/chapter-*.md") | |
| if not chapter_files: | |
| print("No chapter files found in website/docs/. Please ensure chapters exist.") | |
| return | |
| for file_path in chapter_files: | |
| print(f"Processing {file_path}...") | |
| with open(file_path, 'r', encoding='utf-8') as f: | |
| markdown_content = f.read() | |
| chunks = semantic_chunking(markdown_content) | |
| for i, chunk in enumerate(chunks): | |
| chunk_content = chunk["content"] | |
| metadata = chunk["metadata"] | |
| metadata["source"] = file_path # Update source to actual file path | |
| metadata["chunk_number"] = i | |
| # Generate a unique UUID for the Qdrant point | |
| point_id = str(uuid.uuid4()) | |
| # Generate embedding | |
| embedding = embedding_service.encode([chunk_content])[0] # Pass as list and take first element | |
| # Store in Qdrant | |
| qdrant_service.upsert_chunks(ids=[point_id], vectors=[embedding], payloads=[metadata]) | |
| print("Chapter indexing completed.") | |
| if __name__ == "__main__": | |
| index_chapters() | |