Spaces:
Configuration error
Configuration error
| from sentence_transformers import SentenceTransformer | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import torch | |
| import faiss | |
| import numpy as np | |
| # Load embedding model | |
| embedding_model = SentenceTransformer("all-MiniLM-L6-v2") | |
| # Load language model | |
| model_name = "mistralai/Mistral-7B-Instruct-v0.1" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.float16, | |
| device_map="auto" | |
| ) | |
| # Optional: Add system instruction | |
| SYSTEM_PROMPT = "You are an AI assistant helping users understand documents." | |
| # Load FAISS index and documents | |
| def load_faiss_index(): | |
| index = faiss.read_index("vector_index.faiss") | |
| with open("documents.npy", "rb") as f: | |
| documents = np.load(f, allow_pickle=True) | |
| return index, documents | |
| # Embed the user query | |
| def embed_query(query): | |
| return embedding_model.encode([query])[0] | |
| # Retrieve top-k relevant documents | |
| def retrieve_top_k_docs(query_embedding, index, documents, k=3): | |
| query_embedding = np.array([query_embedding]).astype("float32") | |
| scores, indices = index.search(query_embedding, k) | |
| retrieved_docs = [documents[i] for i in indices[0]] | |
| return retrieved_docs | |
| # Generate the final answer | |
| def generate_answer(context_docs, user_query): | |
| context = "\n".join(context_docs) | |
| prompt = f"<s>[INST] {SYSTEM_PROMPT}\n\nContext:\n{context}\n\nQuestion: {user_query} [/INST]" | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| output = model.generate(**inputs, max_new_tokens=500, do_sample=True) | |
| answer = tokenizer.decode(output[0], skip_special_tokens=True) | |
| return answer.split("[/INST]")[-1].strip() | |