Spaces:
Runtime error
Runtime error
semantic search explain
Browse files- __pycache__/utils.cpython-310.pyc +0 -0
- app.py +12 -3
- utils.py +3 -1
__pycache__/utils.cpython-310.pyc
CHANGED
|
Binary files a/__pycache__/utils.cpython-310.pyc and b/__pycache__/utils.cpython-310.pyc differ
|
|
|
app.py
CHANGED
|
@@ -56,15 +56,24 @@ with st.sidebar:
|
|
| 56 |
text = text_to_docs(doc)
|
| 57 |
try:
|
| 58 |
with st.spinner("Indexing document... This may take a while⏳"):
|
| 59 |
-
|
|
|
|
|
|
|
| 60 |
st.session_state["api_key_configured"] = True
|
| 61 |
except OpenAIError as e:
|
| 62 |
st.error(e._message)
|
| 63 |
|
| 64 |
tab1, tab2 = st.tabs(["Intro", "Semantic Search"])
|
| 65 |
with tab1:
|
| 66 |
-
st.markdown("### Semantic Search")
|
| 67 |
-
st.write("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
st.markdown("#### Code GPT")
|
| 69 |
st.write("All code was written with the help of Code GPT. Visit [codegpt.co]('https://codegpt.co') to get the extension.")
|
| 70 |
st.markdown("#### Streamlit")
|
|
|
|
| 56 |
text = text_to_docs(doc)
|
| 57 |
try:
|
| 58 |
with st.spinner("Indexing document... This may take a while⏳"):
|
| 59 |
+
result = embed_docs(text)
|
| 60 |
+
index = result[0]
|
| 61 |
+
embeddings = result[1]
|
| 62 |
st.session_state["api_key_configured"] = True
|
| 63 |
except OpenAIError as e:
|
| 64 |
st.error(e._message)
|
| 65 |
|
| 66 |
tab1, tab2 = st.tabs(["Intro", "Semantic Search"])
|
| 67 |
with tab1:
|
| 68 |
+
st.markdown("### Semantic Search with cosine similarity")
|
| 69 |
+
st.write("Cosine similarity is a technique used to measure the similarity between two vectors. In the context of OpenAI's embedding API, cosine similarity is used to compare the similarity between two pieces of text based on their underlying vector representations.")
|
| 70 |
+
st.write("Here's how it works:")
|
| 71 |
+
st.write("1.First, the embedding API converts each piece of text into a vector representation using a pre-trained language model. This vector represents the meaning and context of the text.")
|
| 72 |
+
st.write("2.The cosine similarity function then takes these two vectors and calculates the cosine of the angle between them. The cosine similarity score ranges from -1 to 1, where 1 indicates that the two vectors are identical, 0 indicates that they are completely dissimilar, and -1 indicates that they are exact opposites.")
|
| 73 |
+
st.write("3.This cosine similarity score is then used to determine the similarity between the two pieces of text. For example, if the cosine similarity score is close to 1, the two pieces of text are likely very similar in meaning, while a score close to 0 suggests that they are completely different.")
|
| 74 |
+
st.write("Overall, cosine similarity is a powerful tool for comparing the semantic similarity between two pieces of text, and OpenAI's embedding API makes it easy to implement this technique in your own projects.")
|
| 75 |
+
st.markdown("""---""")
|
| 76 |
+
st.markdown("## Semantic Search was written with the following tools:")
|
| 77 |
st.markdown("#### Code GPT")
|
| 78 |
st.write("All code was written with the help of Code GPT. Visit [codegpt.co]('https://codegpt.co') to get the extension.")
|
| 79 |
st.markdown("#### Streamlit")
|
utils.py
CHANGED
|
@@ -114,7 +114,9 @@ def embed_docs(docs: List[Document]) -> VectorStore:
|
|
| 114 |
embeddings = OpenAIEmbeddings(openai_api_key=st.session_state.get("OPENAI_API_KEY")) # type: ignore
|
| 115 |
index = FAISS.from_documents(docs, embeddings)
|
| 116 |
|
| 117 |
-
|
|
|
|
|
|
|
| 118 |
|
| 119 |
|
| 120 |
@st.cache(allow_output_mutation=True)
|
|
|
|
| 114 |
embeddings = OpenAIEmbeddings(openai_api_key=st.session_state.get("OPENAI_API_KEY")) # type: ignore
|
| 115 |
index = FAISS.from_documents(docs, embeddings)
|
| 116 |
|
| 117 |
+
# creamos un array para guardar index y guardar embeddings
|
| 118 |
+
result = [index, embeddings]
|
| 119 |
+
return result
|
| 120 |
|
| 121 |
|
| 122 |
@st.cache(allow_output_mutation=True)
|