|
|
""" |
|
|
Indexer Module for Module C |
|
|
Ingests templates from the data directory into the Vector DB. |
|
|
""" |
|
|
|
|
|
import logging |
|
|
import sys |
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
sys.path.append(str(Path(__file__).parent.parent)) |
|
|
|
|
|
from module_c.config import TEMPLATE_DIR |
|
|
from module_c.template_loader import TemplateLoader |
|
|
from module_c.vector_db import TemplateVectorDB |
|
|
from module_a.embeddings import EmbeddingGenerator |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
def build_index(): |
|
|
logger.info("Starting Template Indexing...") |
|
|
|
|
|
|
|
|
loader = TemplateLoader(TEMPLATE_DIR) |
|
|
template_files = loader.list_templates() |
|
|
|
|
|
if not template_files: |
|
|
logger.warning("No templates found to index.") |
|
|
return |
|
|
|
|
|
templates_data = [] |
|
|
texts = [] |
|
|
|
|
|
for filename in template_files: |
|
|
content = loader.load_template(filename) |
|
|
placeholders = list(loader.extract_placeholders(content)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text_for_embedding = f"Template Name: {filename}\nContent:\n{content}" |
|
|
|
|
|
templates_data.append({ |
|
|
"id": filename, |
|
|
"text": content, |
|
|
"metadata": { |
|
|
"filename": filename, |
|
|
"placeholders": ", ".join(placeholders) |
|
|
} |
|
|
}) |
|
|
texts.append(text_for_embedding) |
|
|
logger.info(f"Loaded: {filename}") |
|
|
|
|
|
|
|
|
logger.info("Generating embeddings...") |
|
|
embedder = EmbeddingGenerator() |
|
|
embeddings = embedder.generate_embeddings_batch(texts) |
|
|
|
|
|
|
|
|
logger.info("Storing in Vector DB...") |
|
|
db = TemplateVectorDB() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
db.client.delete_collection(db.collection_name) |
|
|
db.collection = db.client.create_collection(db.collection_name) |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
db.add_templates(templates_data, embeddings.tolist()) |
|
|
|
|
|
logger.info("Indexing Complete!") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
build_index() |
|
|
|