Spaces:
Sleeping
Sleeping
Commit ·
c432fc9
1
Parent(s): a13511e
init
Browse files- .idea/.gitignore +3 -0
- .idea/classify-KBOB.iml +8 -0
- .idea/inspectionProfiles/Project_Default.xml +21 -0
- .idea/inspectionProfiles/profiles_settings.xml +6 -0
- .idea/misc.xml +4 -0
- .idea/modules.xml +8 -0
- .idea/vcs.xml +6 -0
- app.py +56 -0
- requirements.txt +4 -0
- vectorstore/Organisation.json +0 -0
.idea/.gitignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Default ignored files
|
| 2 |
+
/shelf/
|
| 3 |
+
/workspace.xml
|
.idea/classify-KBOB.iml
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<module type="PYTHON_MODULE" version="4">
|
| 3 |
+
<component name="NewModuleRootManager">
|
| 4 |
+
<content url="file://$MODULE_DIR$" />
|
| 5 |
+
<orderEntry type="jdk" jdkName="Python 3.10" jdkType="Python SDK" />
|
| 6 |
+
<orderEntry type="sourceFolder" forTests="false" />
|
| 7 |
+
</component>
|
| 8 |
+
</module>
|
.idea/inspectionProfiles/Project_Default.xml
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<component name="InspectionProjectProfileManager">
|
| 2 |
+
<profile version="1.0">
|
| 3 |
+
<option name="myName" value="Project Default" />
|
| 4 |
+
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
| 5 |
+
<option name="ignoredPackages">
|
| 6 |
+
<value>
|
| 7 |
+
<list size="1">
|
| 8 |
+
<item index="0" class="java.lang.String" itemvalue="faiss" />
|
| 9 |
+
</list>
|
| 10 |
+
</value>
|
| 11 |
+
</option>
|
| 12 |
+
</inspection_tool>
|
| 13 |
+
<inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
| 14 |
+
<option name="ignoredErrors">
|
| 15 |
+
<list>
|
| 16 |
+
<option value="E265" />
|
| 17 |
+
</list>
|
| 18 |
+
</option>
|
| 19 |
+
</inspection_tool>
|
| 20 |
+
</profile>
|
| 21 |
+
</component>
|
.idea/inspectionProfiles/profiles_settings.xml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<component name="InspectionProjectProfileManager">
|
| 2 |
+
<settings>
|
| 3 |
+
<option name="USE_PROJECT_PROFILE" value="false" />
|
| 4 |
+
<version value="1.0" />
|
| 5 |
+
</settings>
|
| 6 |
+
</component>
|
.idea/misc.xml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
|
| 4 |
+
</project>
|
.idea/modules.xml
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="ProjectModuleManager">
|
| 4 |
+
<modules>
|
| 5 |
+
<module fileurl="file://$PROJECT_DIR$/.idea/classify-KBOB.iml" filepath="$PROJECT_DIR$/.idea/classify-KBOB.iml" />
|
| 6 |
+
</modules>
|
| 7 |
+
</component>
|
| 8 |
+
</project>
|
.idea/vcs.xml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="VcsDirectoryMappings">
|
| 4 |
+
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
| 5 |
+
</component>
|
| 6 |
+
</project>
|
app.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import json
|
| 3 |
+
import numpy as np
|
| 4 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 5 |
+
from sentence_transformers import SentenceTransformer
|
| 6 |
+
|
| 7 |
+
# Load the Mistral model and tokenizer
|
| 8 |
+
@st.cache(allow_output_mutation=True)
|
| 9 |
+
def load_model():
|
| 10 |
+
tokenizer = AutoTokenizer.from_pretrained("mistral7b")
|
| 11 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("mistral7b")
|
| 12 |
+
return tokenizer, model
|
| 13 |
+
|
| 14 |
+
# Load Sentence Transformer for embeddings
|
| 15 |
+
@st.cache(allow_output_mutation=True)
|
| 16 |
+
def load_sentence_transformer():
|
| 17 |
+
return SentenceTransformer('all-MiniLM-L6-v2')
|
| 18 |
+
|
| 19 |
+
tokenizer, model = load_model()
|
| 20 |
+
sentence_transformer = load_sentence_transformer()
|
| 21 |
+
|
| 22 |
+
# Load vector store
|
| 23 |
+
@st.cache(allow_output_mutation=True)
|
| 24 |
+
def load_vectorstore():
|
| 25 |
+
with open('vectorstore.json', 'r') as f:
|
| 26 |
+
vectorstore = json.load(f)
|
| 27 |
+
return vectorstore
|
| 28 |
+
|
| 29 |
+
vectorstore = load_vectorstore()
|
| 30 |
+
|
| 31 |
+
# Function to calculate cosine similarity
|
| 32 |
+
def cosine_similarity(vec1, vec2):
|
| 33 |
+
return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
|
| 34 |
+
|
| 35 |
+
# Streamlit UI
|
| 36 |
+
st.title("Simple RAG App with Mistral 7B")
|
| 37 |
+
|
| 38 |
+
query = st.text_input("Enter your question:")
|
| 39 |
+
|
| 40 |
+
if st.button("Get Answer"):
|
| 41 |
+
if query:
|
| 42 |
+
# Embed the query
|
| 43 |
+
query_embedding = sentence_transformer.encode(query)
|
| 44 |
+
|
| 45 |
+
# Find the most similar context in the vector store
|
| 46 |
+
best_match = max(vectorstore, key=lambda x: cosine_similarity(query_embedding, x['embedding']))
|
| 47 |
+
|
| 48 |
+
# Generate answer using the Mistral model
|
| 49 |
+
inputs = tokenizer.encode(query + " " + best_match['text'], return_tensors='pt')
|
| 50 |
+
outputs = model.generate(inputs, max_length=50, num_return_sequences=1)
|
| 51 |
+
|
| 52 |
+
# Decode and display the answer
|
| 53 |
+
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 54 |
+
st.write("**Answer:**", answer)
|
| 55 |
+
else:
|
| 56 |
+
st.write("Please enter a question.")
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
pandas
|
| 3 |
+
sentence-transformers
|
| 4 |
+
unstructured
|
vectorstore/Organisation.json
ADDED
|
File without changes
|