classify-KBOB / app.py
elia-waefler's picture
init
c432fc9
import streamlit as st
import json
import numpy as np
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sentence_transformers import SentenceTransformer
# Load the Mistral model and tokenizer
@st.cache(allow_output_mutation=True)
def load_model():
tokenizer = AutoTokenizer.from_pretrained("mistral7b")
model = AutoModelForSeq2SeqLM.from_pretrained("mistral7b")
return tokenizer, model
# Load Sentence Transformer for embeddings
@st.cache(allow_output_mutation=True)
def load_sentence_transformer():
return SentenceTransformer('all-MiniLM-L6-v2')
tokenizer, model = load_model()
sentence_transformer = load_sentence_transformer()
# Load vector store
@st.cache(allow_output_mutation=True)
def load_vectorstore():
with open('vectorstore.json', 'r') as f:
vectorstore = json.load(f)
return vectorstore
vectorstore = load_vectorstore()
# Function to calculate cosine similarity
def cosine_similarity(vec1, vec2):
return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
# Streamlit UI
st.title("Simple RAG App with Mistral 7B")
query = st.text_input("Enter your question:")
if st.button("Get Answer"):
if query:
# Embed the query
query_embedding = sentence_transformer.encode(query)
# Find the most similar context in the vector store
best_match = max(vectorstore, key=lambda x: cosine_similarity(query_embedding, x['embedding']))
# Generate answer using the Mistral model
inputs = tokenizer.encode(query + " " + best_match['text'], return_tensors='pt')
outputs = model.generate(inputs, max_length=50, num_return_sequences=1)
# Decode and display the answer
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
st.write("**Answer:**", answer)
else:
st.write("Please enter a question.")