Spaces:

dinhquangson
/

QDrantRAG9

Sleeping

App Files Files Community

dinhquangson commited on Jun 28, 2024

Commit

f92376d

verified ·

1 Parent(s): 1d55f55

Create QueryMetadataExtractor.py

Browse files

Files changed (1) hide show

QueryMetadataExtractor.py +52 -0

QueryMetadataExtractor.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import json
+from typing import Dict, List
+from haystack import Pipeline, component
+from haystack.components.builders import PromptBuilder
+from haystack.components.generators import OpenAIGenerator
+@component()
+class QueryMetadataExtractor:
+    def __init__(self):
+        prompt = """
+        You are part of an information system that processes users queries.
+        Given a user query you extract information from it that matches a given list of metadata fields.
+        The information to be extracted from the query must match the semantics associated with the given metadata fields.
+        The information that you extracted from the query will then be used as filters to narrow down the search space
+        when querying an index.
+        Just include the value of the extracted metadata without including the name of the metadata field.
+        The extracted information in 'Extracted metadata' must be returned as a valid JSON structure.
+        ###
+        Example 1:
+        Query: "What was the revenue of Nvidia in 2022?"
+        Metadata fields: {"company", "year"}
+        Extracted metadata fields: {"company": "nvidia", "year": 2022}
+        ###
+        Example 2:
+        Query: "What were the most influential publications in 2023 regarding Alzheimer's disease?"
+        Metadata fields: {"disease", "year"}
+        Extracted metadata fields: {"disease": "Alzheimer", "year": 2023}
+        ###
+        Example 3:
+        Query: "{{query}}"
+        Metadata fields: "{{metadata_fields}}"
+        Extracted metadata fields:
+        """
+        self.pipeline = Pipeline()
+        self.pipeline.add_component(name="builder", instance=PromptBuilder(prompt))
+        self.pipeline.add_component(name="llm", instance=OpenAIGenerator(model="gpt-3.5-turbo"))
+        self.pipeline.connect("builder", "llm")
+    @component.output_types(filters=Dict[str, str])
+    def run(self, query: str, metadata_fields: List[str]):
+        result = self.pipeline.run({'builder': {'query': query, 'metadata_fields': metadata_fields}})
+        metadata = json.loads(result['llm']['replies'][0])
+        # this can be done with specific data structures and in a more sophisticated way
+        filters = []
+        for key, value in metadata.items():
+            field = f"meta.{key}"
+            filters.append({f"field": field, "operator": "==", "value": value})
+        return {"filters": {"operator": "AND", "conditions": filters}}