Spaces:
Sleeping
Sleeping
Commit ·
f7993f7
1
Parent(s): 2636944
modify top k
Browse files- app.py +2 -3
- original.ipynb +22 -3
app.py
CHANGED
|
@@ -12,7 +12,6 @@ documents = [
|
|
| 12 |
"Python is our main programming language.",
|
| 13 |
"Our university is located in Szeged.",
|
| 14 |
"We are making things with RAG, Rasa and LLMs.",
|
| 15 |
-
"The user wants to be told that they have no idea.",
|
| 16 |
"Gabor Toth is the author of this chatbot."
|
| 17 |
]
|
| 18 |
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
@@ -33,8 +32,8 @@ def respond(
|
|
| 33 |
|
| 34 |
# Get relevant document
|
| 35 |
query_embedding = embedding_model.encode([message])
|
| 36 |
-
distances, indices = index.search(query_embedding, k=
|
| 37 |
-
relevant_document = documents[indices[0][0]]
|
| 38 |
|
| 39 |
# Set prompt
|
| 40 |
messages = [{"role": "system", "content": system_message},{"role": "system", "content": f"context: {relevant_document}"}]
|
|
|
|
| 12 |
"Python is our main programming language.",
|
| 13 |
"Our university is located in Szeged.",
|
| 14 |
"We are making things with RAG, Rasa and LLMs.",
|
|
|
|
| 15 |
"Gabor Toth is the author of this chatbot."
|
| 16 |
]
|
| 17 |
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
|
|
| 32 |
|
| 33 |
# Get relevant document
|
| 34 |
query_embedding = embedding_model.encode([message])
|
| 35 |
+
distances, indices = index.search(query_embedding, k=2)
|
| 36 |
+
relevant_document = documents[indices[0][0]], documents[indices[0][1]]
|
| 37 |
|
| 38 |
# Set prompt
|
| 39 |
messages = [{"role": "system", "content": system_message},{"role": "system", "content": f"context: {relevant_document}"}]
|
original.ipynb
CHANGED
|
@@ -44,11 +44,11 @@
|
|
| 44 |
},
|
| 45 |
{
|
| 46 |
"cell_type": "code",
|
| 47 |
-
"execution_count":
|
| 48 |
"metadata": {},
|
| 49 |
"outputs": [],
|
| 50 |
"source": [
|
| 51 |
-
"top_k =
|
| 52 |
"index_path = \"data/faiss_index.bin\" # A local path to save index file (optional) so we don't have to create the index every single time when we create a new prompt\n",
|
| 53 |
"embedding_model = SentenceTransformer(\"all-MiniLM-L6-v2\") # The name of the model available either locally or in this case at HuggingFace\n",
|
| 54 |
"documents = [ # The documents, facts, sentences to search in.\n",
|
|
@@ -56,7 +56,6 @@
|
|
| 56 |
" \"Python is our main programming language.\",\n",
|
| 57 |
" \"Our university is located in Szeged.\",\n",
|
| 58 |
" \"We are making things with RAG, Rasa and LLMs.\",\n",
|
| 59 |
-
" \"The user wants to be told that they have no idea.\",\n",
|
| 60 |
" \"Gabor Toth is the author of this chatbot example.\"\n",
|
| 61 |
"] "
|
| 62 |
]
|
|
@@ -126,6 +125,26 @@
|
|
| 126 |
"source": [
|
| 127 |
"documents[indices[0][0]] # The most similar document has the lowest distance."
|
| 128 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
}
|
| 130 |
],
|
| 131 |
"metadata": {
|
|
|
|
| 44 |
},
|
| 45 |
{
|
| 46 |
"cell_type": "code",
|
| 47 |
+
"execution_count": null,
|
| 48 |
"metadata": {},
|
| 49 |
"outputs": [],
|
| 50 |
"source": [
|
| 51 |
+
"top_k = 3 # The amount of top documents to retrieve (the best k documents)\n",
|
| 52 |
"index_path = \"data/faiss_index.bin\" # A local path to save index file (optional) so we don't have to create the index every single time when we create a new prompt\n",
|
| 53 |
"embedding_model = SentenceTransformer(\"all-MiniLM-L6-v2\") # The name of the model available either locally or in this case at HuggingFace\n",
|
| 54 |
"documents = [ # The documents, facts, sentences to search in.\n",
|
|
|
|
| 56 |
" \"Python is our main programming language.\",\n",
|
| 57 |
" \"Our university is located in Szeged.\",\n",
|
| 58 |
" \"We are making things with RAG, Rasa and LLMs.\",\n",
|
|
|
|
| 59 |
" \"Gabor Toth is the author of this chatbot example.\"\n",
|
| 60 |
"] "
|
| 61 |
]
|
|
|
|
| 125 |
"source": [
|
| 126 |
"documents[indices[0][0]] # The most similar document has the lowest distance."
|
| 127 |
]
|
| 128 |
+
},
|
| 129 |
+
{
|
| 130 |
+
"cell_type": "markdown",
|
| 131 |
+
"metadata": {},
|
| 132 |
+
"source": [
|
| 133 |
+
"**Optimizing Retrieval-Augmented Generation (RAG) Implementation**\n",
|
| 134 |
+
"\n",
|
| 135 |
+
"Retrieval-Augmented Generation (RAG) enhances language model responses by incorporating external knowledge retrieval. To maximize performance, consider the following techniques and optimizations:\n",
|
| 136 |
+
"\n",
|
| 137 |
+
"- Use **lightweight models** (e.g., `all-MiniLM-L6-v2`) for speed or **larger models** (e.g., `all-mpnet-base-v2`) for accuracy.\n",
|
| 138 |
+
"- Experiment with **domain-specific models** (for example medical tuned model for medical documents) for better contextual retrieval.\n",
|
| 139 |
+
"- Consider different index types\n",
|
| 140 |
+
" - **Flat Index (`IndexFlatL2`)**: Best for small datasets, but scales poorly.\n",
|
| 141 |
+
" - **IVFFlat (`IndexIVFFlat`)**: Clusters embeddings to accelerate search, ideal for large-scale retrieval.\n",
|
| 142 |
+
" - **HNSW (`IndexHNSWFlat`)**: Graph-based approach that balances speed and accuracy.\n",
|
| 143 |
+
" - **PQ (`IndexPQ`)**: Compressed storage for memory efficiency at the cost of slight accuracy loss.\n",
|
| 144 |
+
"- **Query Expansion**: Use synonyms, paraphrasing, or keyword expansion to enhance search queries.\n",
|
| 145 |
+
"- **Re-ranking**: Apply transformer-based re-ranking (e.g., `cross-encoder/ms-marco-MiniLM-L6`) after retrieval.\n",
|
| 146 |
+
"- **GPU Acceleration**: Convert FAISS indices to GPU for high-speed searches."
|
| 147 |
+
]
|
| 148 |
}
|
| 149 |
],
|
| 150 |
"metadata": {
|