try hyde query
Browse files
app.py
CHANGED
|
@@ -1,17 +1,22 @@
|
|
| 1 |
from datasets import load_dataset
|
| 2 |
from llama_index.core import VectorStoreIndex, Document
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import gradio as gr
|
| 4 |
|
| 5 |
# dataset=load_dataset("davidr70/megillah_english_sugyot", split="train")
|
| 6 |
dataset=load_dataset("davidr70/megilla_sugyot_merged", split="train")
|
| 7 |
documents = [Document(text=item['content'], metadata=item['metadata']) for item in dataset]
|
| 8 |
|
| 9 |
-
|
| 10 |
#documents = SimpleDirectoryReader("data").load_data()
|
| 11 |
index = VectorStoreIndex.from_documents(documents)
|
| 12 |
retriever = index.as_retriever(
|
| 13 |
similarity_top_k=7, # Number of hits to return
|
| 14 |
-
vector_store_query_mode="default" # Basic semantic search
|
|
|
|
| 15 |
)
|
| 16 |
|
| 17 |
|
|
|
|
| 1 |
from datasets import load_dataset
|
| 2 |
from llama_index.core import VectorStoreIndex, Document
|
| 3 |
+
from llama_index.core.indices.query.query_transform.base import (
|
| 4 |
+
HyDEQueryTransform,
|
| 5 |
+
)
|
| 6 |
+
from llama_index.core.query_engine import TransformQueryEngine
|
| 7 |
import gradio as gr
|
| 8 |
|
| 9 |
# dataset=load_dataset("davidr70/megillah_english_sugyot", split="train")
|
| 10 |
dataset=load_dataset("davidr70/megilla_sugyot_merged", split="train")
|
| 11 |
documents = [Document(text=item['content'], metadata=item['metadata']) for item in dataset]
|
| 12 |
|
| 13 |
+
hyde = HyDEQueryTransform(include_original=True)
|
| 14 |
#documents = SimpleDirectoryReader("data").load_data()
|
| 15 |
index = VectorStoreIndex.from_documents(documents)
|
| 16 |
retriever = index.as_retriever(
|
| 17 |
similarity_top_k=7, # Number of hits to return
|
| 18 |
+
vector_store_query_mode="default", # Basic semantic search
|
| 19 |
+
query_transform=hyde
|
| 20 |
)
|
| 21 |
|
| 22 |
|