Sandei commited on
Commit
a5d886c
·
1 Parent(s): 9c60f47

rag update

Browse files
__pycache__/app.cpython-314.pyc CHANGED
Binary files a/__pycache__/app.cpython-314.pyc and b/__pycache__/app.cpython-314.pyc differ
 
app.py CHANGED
@@ -11,8 +11,9 @@ from models import (
11
  UrgencyPrediction
12
  )
13
  from multi_task_model_class import MultiTaskModel
14
- from rag import generate_answer
15
  from memory import get_conversation, add_message
 
16
 
17
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
18
 
@@ -127,15 +128,6 @@ def classify_text(text: str, threshold: float = 0.5):
127
  return categories, urgency
128
 
129
 
130
- def retrieve_documents(query: str):
131
- """
132
- Retrieve relevant documents for RAG.
133
- """
134
- return [
135
- "Restarting the router fixes most connectivity issues.",
136
- "Check for planned ISP maintenance.",
137
- "Verify cables are securely connected."
138
- ]
139
 
140
 
141
  @app.get("/")
@@ -160,8 +152,7 @@ def query_endpoint(req: QueryRequest):
160
  categories, urgency = classify_text(req.query)
161
 
162
  # RAG
163
- docs = retrieve_documents(req.query)
164
- answer = generate_answer(req.query, docs, history)
165
 
166
  # Update conversation memory
167
  add_message(req.user_id, "user", req.query)
 
11
  UrgencyPrediction
12
  )
13
  from multi_task_model_class import MultiTaskModel
14
+
15
  from memory import get_conversation, add_message
16
+ from service.rag_service import generate_answer
17
 
18
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
19
 
 
128
  return categories, urgency
129
 
130
 
 
 
 
 
 
 
 
 
 
131
 
132
 
133
  @app.get("/")
 
152
  categories, urgency = classify_text(req.query)
153
 
154
  # RAG
155
+ answer = generate_answer(req.query,history)
 
156
 
157
  # Update conversation memory
158
  add_message(req.user_id, "user", req.query)
service/__pycache__/data_loader_service.cpython-314.pyc ADDED
Binary file (2 kB). View file
 
service/__pycache__/embedded_service.cpython-314.pyc ADDED
Binary file (1.21 kB). View file
 
service/__pycache__/llm_service.cpython-314.pyc ADDED
Binary file (1.93 kB). View file
 
service/__pycache__/memory_service.cpython-314.pyc ADDED
Binary file (892 Bytes). View file
 
service/__pycache__/rag_service.cpython-314.pyc ADDED
Binary file (1.87 kB). View file
 
service/__pycache__/vector_store_service.cpython-314.pyc ADDED
Binary file (1.71 kB). View file
 
service/data_loader_service.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ from pathlib import Path
3
+
4
+ class CSVDataLoader:
5
+ def __init__(self, file_path: str):
6
+ self.file_path = Path(file_path)
7
+
8
+ def load_qa_pairs(self) -> list[str]:
9
+ documents = []
10
+
11
+ with open(self.file_path, newline="", encoding="utf-8") as f:
12
+ reader = csv.DictReader(f)
13
+
14
+ for row in reader:
15
+ question = row.get("question", "").strip()
16
+ answer = row.get("answer", "").strip()
17
+
18
+ if question and answer:
19
+ doc = f"Question: {question}\nAnswer: {answer}"
20
+ documents.append(doc)
21
+
22
+ return documents
service/embedded_service.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+
3
+ class EmbeddingService:
4
+ def __init__(self):
5
+ self.model = SentenceTransformer("all-MiniLM-L6-v2")
6
+
7
+ def embed(self, texts: list[str]):
8
+ return self.model.encode(texts, normalize_embeddings=True)
service/llm_service.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM
2
+ import torch
3
+
4
+ class LLMService:
5
+ def __init__(self):
6
+ model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
7
+
8
+ self.tokenizer = AutoTokenizer.from_pretrained(model_id)
9
+ self.model = AutoModelForCausalLM.from_pretrained(
10
+ model_id,
11
+ torch_dtype=torch.float16,
12
+ device_map="auto"
13
+ )
14
+
15
+ def generate(self, prompt: str) -> str:
16
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
17
+
18
+ output = self.model.generate(
19
+ **inputs,
20
+ max_new_tokens=200,
21
+ temperature=0.3,
22
+ top_p=0.9,
23
+ do_sample=True
24
+ )
25
+
26
+ return self.tokenizer.decode(output[0], skip_special_tokens=True)
service/memory_service.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ memory_store = {}
2
+
3
+ def get_memory(session_id: str) -> str:
4
+ return memory_store.get(session_id, "")
5
+
6
+ def save_memory(session_id: str, summary: str):
7
+ memory_store[session_id] = summary
service/rag_service.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from service.embedded_service import EmbeddingService
3
+ from service.llm_service import LLMService
4
+ from service.memory_service import get_memory
5
+ from service.vector_store_service import VectorStoreService
6
+ from service.data_loader_service import CSVDataLoader
7
+
8
+ # Initialize once
9
+ embedder = EmbeddingService()
10
+ llm = LLMService()
11
+
12
+ # Load documents from CSV
13
+ loader = CSVDataLoader("final_data_set(in).csv")
14
+ documents = loader.load_qa_pairs()
15
+
16
+ doc_embeddings = embedder.embed(documents)
17
+ vector_store = VectorStoreService(doc_embeddings, documents)
18
+
19
+ def generate_answer(question: str, session_id: str | None):
20
+ query_embedding = embedder.embed([question])[0]
21
+ context_chunks = vector_store.search(query_embedding, k=3)
22
+
23
+ memory = get_memory(session_id) if session_id else ""
24
+
25
+ prompt = f"""
26
+ <|system|>
27
+ You are a helpful assistant. Answer ONLY using the provided context.
28
+ If the answer is not in the context, say "I don't know".
29
+
30
+ Conversation memory:
31
+ {memory}
32
+
33
+ <|user|>
34
+ Context:
35
+ {chr(10).join(context_chunks)}
36
+
37
+ Question:
38
+ {question}
39
+
40
+ <|assistant|>
41
+ """
42
+
43
+ return llm.generate(prompt)
service/vector_store_service.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import faiss
2
+ import numpy as np
3
+
4
+ class VectorStoreService:
5
+ def __init__(self, embeddings: list, documents: list):
6
+ self.documents = documents
7
+ dim = len(embeddings[0])
8
+
9
+ self.index = faiss.IndexFlatL2(dim)
10
+ self.index.add(np.array(embeddings))
11
+
12
+ def search(self, query_embedding, k=3):
13
+ distances, indices = self.index.search(
14
+ np.array([query_embedding]), k
15
+ )
16
+ return [self.documents[i] for i in indices[0]]