| import os |
| import json |
| from pathlib import Path |
| from dotenv import load_dotenv |
| from agno.embedder.openai import OpenAIEmbedder |
| from agno.knowledge.pdf import PDFKnowledgeBase, PDFReader |
| from agno.vectordb.qdrant import Qdrant |
| from agno.document.chunking.fixed import FixedSizeChunking |
|
|
| |
| load_dotenv() |
|
|
| QDRANT_URL = os.getenv("QDRANT_URL") |
| QDRANT_API_KEY = os.getenv("QDRANT_API_KEY") |
|
|
| |
| |
| |
| |
| |
|
|
| embeddings = OpenAIEmbedder( |
| id="BAAI/bge-en-icl", |
| dimensions=4096, |
| api_key=os.getenv("NEBIUS_API_KEY"), |
| base_url="https://api.studio.nebius.com/v1/" |
| ) |
|
|
|
|
| class AgnoKnowledgeBase: |
| def __init__(self, query: str, user_id: str, thread_id: str, agno_kb_config: dict, |
| chunk_size: int = 1000, num_documents: int = 6): |
| self.query = query |
| self.user_id = user_id |
| self.thread_id = thread_id |
| self.agno_kb_config = agno_kb_config |
| self.chunk_size = chunk_size |
| self.num_documents = num_documents |
|
|
| def setup_knowledge_base(self): |
| print(self.agno_kb_config) |
| agno_kb_config = self.agno_kb_config['knowledge_base'] |
| input_data = agno_kb_config.get("input_data", {}) |
| sources = input_data.get("source", []) |
| recreate = agno_kb_config.get("recreate", False) |
| collection_name = agno_kb_config.get("collection_name") |
| chunk_size = agno_kb_config.get("chunk_size") |
| overlap = agno_kb_config.get("overlap") |
| num_documents = agno_kb_config.get("num_documents") |
| chunking_strategy_type = agno_kb_config.get("chunking_strategy", "fixed") |
|
|
| if chunking_strategy_type == "fixed": |
| chunking_strategy = FixedSizeChunking(chunk_size=chunk_size, overlap=overlap) |
| else: |
| raise ValueError(f"Unsupported chunking strategy: {chunking_strategy_type}") |
|
|
| vector_db = Qdrant( |
| collection=collection_name, |
| embedder=embeddings, |
| url=QDRANT_URL, |
| api_key=QDRANT_API_KEY |
| ) |
|
|
| for source in sources: |
| paths = source.get("path", []) |
| for path in paths: |
| print(f"Loading PDF into Qdrant: {path}") |
| knowledge_base = PDFKnowledgeBase( |
| path=path, |
| vector_db=vector_db, |
| reader=PDFReader(), |
| chunking_strategy=chunking_strategy, |
| num_documents=num_documents |
| ) |
| knowledge_base.load(recreate=recreate) |
|
|
| return PDFKnowledgeBase( |
| path=None, |
| vector_db=vector_db, |
| reader=PDFReader(), |
| chunking_strategy=chunking_strategy, |
| num_documents=num_documents |
| ) |
|
|