import streamlit as st import json import numpy as np from transformers import AutoTokenizer, AutoModelForSeq2SeqLM from sentence_transformers import SentenceTransformer # Load the Mistral model and tokenizer @st.cache(allow_output_mutation=True) def load_model(): tokenizer = AutoTokenizer.from_pretrained("mistral7b") model = AutoModelForSeq2SeqLM.from_pretrained("mistral7b") return tokenizer, model # Load Sentence Transformer for embeddings @st.cache(allow_output_mutation=True) def load_sentence_transformer(): return SentenceTransformer('all-MiniLM-L6-v2') tokenizer, model = load_model() sentence_transformer = load_sentence_transformer() # Load vector store @st.cache(allow_output_mutation=True) def load_vectorstore(): with open('vectorstore.json', 'r') as f: vectorstore = json.load(f) return vectorstore vectorstore = load_vectorstore() # Function to calculate cosine similarity def cosine_similarity(vec1, vec2): return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2)) # Streamlit UI st.title("Simple RAG App with Mistral 7B") query = st.text_input("Enter your question:") if st.button("Get Answer"): if query: # Embed the query query_embedding = sentence_transformer.encode(query) # Find the most similar context in the vector store best_match = max(vectorstore, key=lambda x: cosine_similarity(query_embedding, x['embedding'])) # Generate answer using the Mistral model inputs = tokenizer.encode(query + " " + best_match['text'], return_tensors='pt') outputs = model.generate(inputs, max_length=50, num_return_sequences=1) # Decode and display the answer answer = tokenizer.decode(outputs[0], skip_special_tokens=True) st.write("**Answer:**", answer) else: st.write("Please enter a question.")