rag update
Browse files- __pycache__/app.cpython-314.pyc +0 -0
- app.py +3 -12
- service/__pycache__/data_loader_service.cpython-314.pyc +0 -0
- service/__pycache__/embedded_service.cpython-314.pyc +0 -0
- service/__pycache__/llm_service.cpython-314.pyc +0 -0
- service/__pycache__/memory_service.cpython-314.pyc +0 -0
- service/__pycache__/rag_service.cpython-314.pyc +0 -0
- service/__pycache__/vector_store_service.cpython-314.pyc +0 -0
- service/data_loader_service.py +22 -0
- service/embedded_service.py +8 -0
- service/llm_service.py +26 -0
- service/memory_service.py +7 -0
- service/rag_service.py +43 -0
- service/vector_store_service.py +16 -0
__pycache__/app.cpython-314.pyc
CHANGED
|
Binary files a/__pycache__/app.cpython-314.pyc and b/__pycache__/app.cpython-314.pyc differ
|
|
|
app.py
CHANGED
|
@@ -11,8 +11,9 @@ from models import (
|
|
| 11 |
UrgencyPrediction
|
| 12 |
)
|
| 13 |
from multi_task_model_class import MultiTaskModel
|
| 14 |
-
|
| 15 |
from memory import get_conversation, add_message
|
|
|
|
| 16 |
|
| 17 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 18 |
|
|
@@ -127,15 +128,6 @@ def classify_text(text: str, threshold: float = 0.5):
|
|
| 127 |
return categories, urgency
|
| 128 |
|
| 129 |
|
| 130 |
-
def retrieve_documents(query: str):
|
| 131 |
-
"""
|
| 132 |
-
Retrieve relevant documents for RAG.
|
| 133 |
-
"""
|
| 134 |
-
return [
|
| 135 |
-
"Restarting the router fixes most connectivity issues.",
|
| 136 |
-
"Check for planned ISP maintenance.",
|
| 137 |
-
"Verify cables are securely connected."
|
| 138 |
-
]
|
| 139 |
|
| 140 |
|
| 141 |
@app.get("/")
|
|
@@ -160,8 +152,7 @@ def query_endpoint(req: QueryRequest):
|
|
| 160 |
categories, urgency = classify_text(req.query)
|
| 161 |
|
| 162 |
# RAG
|
| 163 |
-
|
| 164 |
-
answer = generate_answer(req.query, docs, history)
|
| 165 |
|
| 166 |
# Update conversation memory
|
| 167 |
add_message(req.user_id, "user", req.query)
|
|
|
|
| 11 |
UrgencyPrediction
|
| 12 |
)
|
| 13 |
from multi_task_model_class import MultiTaskModel
|
| 14 |
+
|
| 15 |
from memory import get_conversation, add_message
|
| 16 |
+
from service.rag_service import generate_answer
|
| 17 |
|
| 18 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 19 |
|
|
|
|
| 128 |
return categories, urgency
|
| 129 |
|
| 130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
|
| 133 |
@app.get("/")
|
|
|
|
| 152 |
categories, urgency = classify_text(req.query)
|
| 153 |
|
| 154 |
# RAG
|
| 155 |
+
answer = generate_answer(req.query,history)
|
|
|
|
| 156 |
|
| 157 |
# Update conversation memory
|
| 158 |
add_message(req.user_id, "user", req.query)
|
service/__pycache__/data_loader_service.cpython-314.pyc
ADDED
|
Binary file (2 kB). View file
|
|
|
service/__pycache__/embedded_service.cpython-314.pyc
ADDED
|
Binary file (1.21 kB). View file
|
|
|
service/__pycache__/llm_service.cpython-314.pyc
ADDED
|
Binary file (1.93 kB). View file
|
|
|
service/__pycache__/memory_service.cpython-314.pyc
ADDED
|
Binary file (892 Bytes). View file
|
|
|
service/__pycache__/rag_service.cpython-314.pyc
ADDED
|
Binary file (1.87 kB). View file
|
|
|
service/__pycache__/vector_store_service.cpython-314.pyc
ADDED
|
Binary file (1.71 kB). View file
|
|
|
service/data_loader_service.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import csv
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
class CSVDataLoader:
|
| 5 |
+
def __init__(self, file_path: str):
|
| 6 |
+
self.file_path = Path(file_path)
|
| 7 |
+
|
| 8 |
+
def load_qa_pairs(self) -> list[str]:
|
| 9 |
+
documents = []
|
| 10 |
+
|
| 11 |
+
with open(self.file_path, newline="", encoding="utf-8") as f:
|
| 12 |
+
reader = csv.DictReader(f)
|
| 13 |
+
|
| 14 |
+
for row in reader:
|
| 15 |
+
question = row.get("question", "").strip()
|
| 16 |
+
answer = row.get("answer", "").strip()
|
| 17 |
+
|
| 18 |
+
if question and answer:
|
| 19 |
+
doc = f"Question: {question}\nAnswer: {answer}"
|
| 20 |
+
documents.append(doc)
|
| 21 |
+
|
| 22 |
+
return documents
|
service/embedded_service.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sentence_transformers import SentenceTransformer
|
| 2 |
+
|
| 3 |
+
class EmbeddingService:
|
| 4 |
+
def __init__(self):
|
| 5 |
+
self.model = SentenceTransformer("all-MiniLM-L6-v2")
|
| 6 |
+
|
| 7 |
+
def embed(self, texts: list[str]):
|
| 8 |
+
return self.model.encode(texts, normalize_embeddings=True)
|
service/llm_service.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
class LLMService:
|
| 5 |
+
def __init__(self):
|
| 6 |
+
model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
|
| 7 |
+
|
| 8 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 9 |
+
self.model = AutoModelForCausalLM.from_pretrained(
|
| 10 |
+
model_id,
|
| 11 |
+
torch_dtype=torch.float16,
|
| 12 |
+
device_map="auto"
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
def generate(self, prompt: str) -> str:
|
| 16 |
+
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
|
| 17 |
+
|
| 18 |
+
output = self.model.generate(
|
| 19 |
+
**inputs,
|
| 20 |
+
max_new_tokens=200,
|
| 21 |
+
temperature=0.3,
|
| 22 |
+
top_p=0.9,
|
| 23 |
+
do_sample=True
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
return self.tokenizer.decode(output[0], skip_special_tokens=True)
|
service/memory_service.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
memory_store = {}
|
| 2 |
+
|
| 3 |
+
def get_memory(session_id: str) -> str:
|
| 4 |
+
return memory_store.get(session_id, "")
|
| 5 |
+
|
| 6 |
+
def save_memory(session_id: str, summary: str):
|
| 7 |
+
memory_store[session_id] = summary
|
service/rag_service.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from service.embedded_service import EmbeddingService
|
| 3 |
+
from service.llm_service import LLMService
|
| 4 |
+
from service.memory_service import get_memory
|
| 5 |
+
from service.vector_store_service import VectorStoreService
|
| 6 |
+
from service.data_loader_service import CSVDataLoader
|
| 7 |
+
|
| 8 |
+
# Initialize once
|
| 9 |
+
embedder = EmbeddingService()
|
| 10 |
+
llm = LLMService()
|
| 11 |
+
|
| 12 |
+
# Load documents from CSV
|
| 13 |
+
loader = CSVDataLoader("final_data_set(in).csv")
|
| 14 |
+
documents = loader.load_qa_pairs()
|
| 15 |
+
|
| 16 |
+
doc_embeddings = embedder.embed(documents)
|
| 17 |
+
vector_store = VectorStoreService(doc_embeddings, documents)
|
| 18 |
+
|
| 19 |
+
def generate_answer(question: str, session_id: str | None):
|
| 20 |
+
query_embedding = embedder.embed([question])[0]
|
| 21 |
+
context_chunks = vector_store.search(query_embedding, k=3)
|
| 22 |
+
|
| 23 |
+
memory = get_memory(session_id) if session_id else ""
|
| 24 |
+
|
| 25 |
+
prompt = f"""
|
| 26 |
+
<|system|>
|
| 27 |
+
You are a helpful assistant. Answer ONLY using the provided context.
|
| 28 |
+
If the answer is not in the context, say "I don't know".
|
| 29 |
+
|
| 30 |
+
Conversation memory:
|
| 31 |
+
{memory}
|
| 32 |
+
|
| 33 |
+
<|user|>
|
| 34 |
+
Context:
|
| 35 |
+
{chr(10).join(context_chunks)}
|
| 36 |
+
|
| 37 |
+
Question:
|
| 38 |
+
{question}
|
| 39 |
+
|
| 40 |
+
<|assistant|>
|
| 41 |
+
"""
|
| 42 |
+
|
| 43 |
+
return llm.generate(prompt)
|
service/vector_store_service.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import faiss
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
class VectorStoreService:
|
| 5 |
+
def __init__(self, embeddings: list, documents: list):
|
| 6 |
+
self.documents = documents
|
| 7 |
+
dim = len(embeddings[0])
|
| 8 |
+
|
| 9 |
+
self.index = faiss.IndexFlatL2(dim)
|
| 10 |
+
self.index.add(np.array(embeddings))
|
| 11 |
+
|
| 12 |
+
def search(self, query_embedding, k=3):
|
| 13 |
+
distances, indices = self.index.search(
|
| 14 |
+
np.array([query_embedding]), k
|
| 15 |
+
)
|
| 16 |
+
return [self.documents[i] for i in indices[0]]
|