init

Browse files

Files changed (10) hide show

.idea/.gitignore +3 -0
.idea/classify-KBOB.iml +8 -0
.idea/inspectionProfiles/Project_Default.xml +21 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/misc.xml +4 -0
.idea/modules.xml +8 -0
.idea/vcs.xml +6 -0
app.py +56 -0
requirements.txt +4 -0
vectorstore/Organisation.json +0 -0

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+# Default ignored files
+/shelf/
+/workspace.xml

.idea/classify-KBOB.iml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="Python 3.10" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

.idea/inspectionProfiles/Project_Default.xml ADDED Viewed

	@@ -0,0 +1,21 @@

+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="1">
+            <item index="0" class="java.lang.String" itemvalue="faiss" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <option name="ignoredErrors">
+        <list>
+          <option value="E265" />
+        </list>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,4 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/classify-KBOB.iml" filepath="$PROJECT_DIR$/.idea/classify-KBOB.iml" />
+    </modules>
+  </component>
+</project>

.idea/vcs.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>

app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import streamlit as st
+import json
+import numpy as np
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+from sentence_transformers import SentenceTransformer
+# Load the Mistral model and tokenizer
+@st.cache(allow_output_mutation=True)
+def load_model():
+    tokenizer = AutoTokenizer.from_pretrained("mistral7b")
+    model = AutoModelForSeq2SeqLM.from_pretrained("mistral7b")
+    return tokenizer, model
+# Load Sentence Transformer for embeddings
+@st.cache(allow_output_mutation=True)
+def load_sentence_transformer():
+    return SentenceTransformer('all-MiniLM-L6-v2')
+tokenizer, model = load_model()
+sentence_transformer = load_sentence_transformer()
+# Load vector store
+@st.cache(allow_output_mutation=True)
+def load_vectorstore():
+    with open('vectorstore.json', 'r') as f:
+        vectorstore = json.load(f)
+    return vectorstore
+vectorstore = load_vectorstore()
+# Function to calculate cosine similarity
+def cosine_similarity(vec1, vec2):
+    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
+# Streamlit UI
+st.title("Simple RAG App with Mistral 7B")
+query = st.text_input("Enter your question:")
+if st.button("Get Answer"):
+    if query:
+        # Embed the query
+        query_embedding = sentence_transformer.encode(query)
+        # Find the most similar context in the vector store
+        best_match = max(vectorstore, key=lambda x: cosine_similarity(query_embedding, x['embedding']))
+        # Generate answer using the Mistral model
+        inputs = tokenizer.encode(query + " " + best_match['text'], return_tensors='pt')
+        outputs = model.generate(inputs, max_length=50, num_return_sequences=1)
+        # Decode and display the answer
+        answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        st.write("**Answer:**", answer)
+    else:
+        st.write("Please enter a question.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+streamlit
+pandas
+sentence-transformers
+unstructured

vectorstore/Organisation.json ADDED Viewed

File without changes