elia-waefler commited on
Commit
c432fc9
·
1 Parent(s): a13511e
.idea/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
.idea/classify-KBOB.iml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="jdk" jdkName="Python 3.10" jdkType="Python SDK" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ </module>
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
5
+ <option name="ignoredPackages">
6
+ <value>
7
+ <list size="1">
8
+ <item index="0" class="java.lang.String" itemvalue="faiss" />
9
+ </list>
10
+ </value>
11
+ </option>
12
+ </inspection_tool>
13
+ <inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
14
+ <option name="ignoredErrors">
15
+ <list>
16
+ <option value="E265" />
17
+ </list>
18
+ </option>
19
+ </inspection_tool>
20
+ </profile>
21
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
4
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/classify-KBOB.iml" filepath="$PROJECT_DIR$/.idea/classify-KBOB.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$" vcs="Git" />
5
+ </component>
6
+ </project>
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import json
3
+ import numpy as np
4
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
+ from sentence_transformers import SentenceTransformer
6
+
7
+ # Load the Mistral model and tokenizer
8
+ @st.cache(allow_output_mutation=True)
9
+ def load_model():
10
+ tokenizer = AutoTokenizer.from_pretrained("mistral7b")
11
+ model = AutoModelForSeq2SeqLM.from_pretrained("mistral7b")
12
+ return tokenizer, model
13
+
14
+ # Load Sentence Transformer for embeddings
15
+ @st.cache(allow_output_mutation=True)
16
+ def load_sentence_transformer():
17
+ return SentenceTransformer('all-MiniLM-L6-v2')
18
+
19
+ tokenizer, model = load_model()
20
+ sentence_transformer = load_sentence_transformer()
21
+
22
+ # Load vector store
23
+ @st.cache(allow_output_mutation=True)
24
+ def load_vectorstore():
25
+ with open('vectorstore.json', 'r') as f:
26
+ vectorstore = json.load(f)
27
+ return vectorstore
28
+
29
+ vectorstore = load_vectorstore()
30
+
31
+ # Function to calculate cosine similarity
32
+ def cosine_similarity(vec1, vec2):
33
+ return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
34
+
35
+ # Streamlit UI
36
+ st.title("Simple RAG App with Mistral 7B")
37
+
38
+ query = st.text_input("Enter your question:")
39
+
40
+ if st.button("Get Answer"):
41
+ if query:
42
+ # Embed the query
43
+ query_embedding = sentence_transformer.encode(query)
44
+
45
+ # Find the most similar context in the vector store
46
+ best_match = max(vectorstore, key=lambda x: cosine_similarity(query_embedding, x['embedding']))
47
+
48
+ # Generate answer using the Mistral model
49
+ inputs = tokenizer.encode(query + " " + best_match['text'], return_tensors='pt')
50
+ outputs = model.generate(inputs, max_length=50, num_return_sequences=1)
51
+
52
+ # Decode and display the answer
53
+ answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
54
+ st.write("**Answer:**", answer)
55
+ else:
56
+ st.write("Please enter a question.")
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ sentence-transformers
4
+ unstructured
vectorstore/Organisation.json ADDED
File without changes