| from service.data_loader_service import CSVDataLoader |
| from service.embedded_service import EmbeddingService |
| from service.vector_store_service import VectorStoreService |
| from service.llm_service import LLMService |
| from pathlib import Path |
| import pickle |
|
|
| |
| |
| |
|
|
| CACHE = Path("embeddings.pkl") |
|
|
| |
| embedder = EmbeddingService() |
| llm = LLMService() |
|
|
| |
| loader = CSVDataLoader("final_data_set(in).csv") |
| documents = loader.load_qa_pairs() |
|
|
| |
| if CACHE.exists(): |
| with CACHE.open("rb") as f: |
| embeddings = pickle.load(f) |
| else: |
| embeddings = embedder.embed(documents) |
| with CACHE.open("wb") as f: |
| pickle.dump(embeddings, f) |
|
|
| vector_store = VectorStoreService(embeddings, documents) |
|
|
| |
| |
| |
|
|
| def generate_answer(question: str, k: int = 3, min_similarity: float = 0.65) -> str: |
| """ |
| Generates answer using RAG (retrieval + LLM). |
| Only includes context with similarity above threshold. |
| Returns fallback if no relevant context. |
| """ |
|
|
| |
| query_vec = embedder.embed([question])[0] |
|
|
| |
| results = vector_store.search_with_scores(query_vec, k=k) |
|
|
| |
| top_docs = [doc for doc, score in results if score >= min_similarity] |
|
|
| if not top_docs: |
| |
| return "I’m sorry, I don’t have relevant information in my knowledge base for this query." |
|
|
| |
| context_text = "\n\n".join(top_docs) |
|
|
| |
| prompt = f""" |
| You are a helpful IT support assistant. |
| |
| - ONLY answer based on the context below. |
| - DO NOT hallucinate or invent new steps. |
| - If the answer is not explicitly present in the context, return the context itself. |
| - Keep answers concise and step-by-step if possible. |
| |
| Context: |
| {context_text} |
| |
| User question: {question} |
| |
| Answer: |
| <|assistant|> |
| """ |
|
|
| |
| return llm.generate(prompt) |
|
|