Spaces:
Runtime error
Runtime error
Update ragchain.py
Browse files- ragchain.py +69 -10
ragchain.py
CHANGED
|
@@ -7,6 +7,47 @@ class RAGChain:
|
|
| 7 |
self.llm = llm
|
| 8 |
self.vector_store = vector_store
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
def predict_library_usage(self, query):
|
| 11 |
"""
|
| 12 |
Use the LLM to predict the relevant library for the user's query.
|
|
@@ -39,8 +80,8 @@ class RAGChain:
|
|
| 39 |
"""
|
| 40 |
Format the retrieved document and code contexts.
|
| 41 |
"""
|
| 42 |
-
doc_context =
|
| 43 |
-
code_context =
|
| 44 |
|
| 45 |
return doc_context, code_context
|
| 46 |
|
|
@@ -74,13 +115,31 @@ class RAGChain:
|
|
| 74 |
"""
|
| 75 |
return self.llm.invoke(prompt).content
|
| 76 |
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
library_usage_prediction = self.predict_library_usage(query)
|
| 82 |
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
-
return
|
|
|
|
| 7 |
self.llm = llm
|
| 8 |
self.vector_store = vector_store
|
| 9 |
|
| 10 |
+
|
| 11 |
+
def rewrite_query(self, query):
|
| 12 |
+
"""
|
| 13 |
+
Rewrite the user's query to align with the language and structure of the library's methods and documentation.
|
| 14 |
+
"""
|
| 15 |
+
rewrite_prompt = (
|
| 16 |
+
f"""You are an intelligent assistant that helps users rewrite their queries.
|
| 17 |
+
The vectorstore consists of the source code and documentation of a Python library, which enables users to
|
| 18 |
+
programmatically interact with a REST-like API of a software system. The library methods have descriptive
|
| 19 |
+
docstrings. Your task is to rewrite the query in a way that aligns with the language and structure of the
|
| 20 |
+
library's methods and documentation, ensuring optimal retrieval of relevant information.
|
| 21 |
+
|
| 22 |
+
Guidelines for rewriting the query:
|
| 23 |
+
1. Identify the main action the user wants to perform (e.g., "Upload a file to a record," "Get users of a group").
|
| 24 |
+
2. Remove conversational elements like greetings or pleasantries (e.g., "Hello Chatbot", "I need you to help me with").
|
| 25 |
+
3. Exclude specific variable values (e.g., "ID of my record is '31'") unless essential to the intent.
|
| 26 |
+
4. Rephrase the query to match the format and keywords used in the docstrings, focusing on verbs and objects relevant to the action (e.g., "Add a record to a collection").
|
| 27 |
+
5. Given the query the user might need more than one action to achieve his goal. In this case the rewritten query has more than one action.
|
| 28 |
+
|
| 29 |
+
Examples:
|
| 30 |
+
- User query: "Create a Python script with a method that facilitates the creation of records. This method should accept an array of identifiers as a parameter and allow metadata to be added to each record."
|
| 31 |
+
- Rewritten query: "create records, add metadata to record"
|
| 32 |
+
- User query: "Hi, can you help me write Python code to add a record to a collection? The record ID is '45', and the collection ID is '12'."
|
| 33 |
+
Rewritten query: "add a record to a collection"
|
| 34 |
+
- User query: I need a python script with which i create a new record with the title: "Hello World" and then link the record to a given collection.
|
| 35 |
+
Rewritten query: "create a new record with title" , "link a record to a collection"
|
| 36 |
+
|
| 37 |
+
Based on these examples and guidelines, rewrite the following user query to align more effectively with the keywords used in the docstrings.
|
| 38 |
+
Do not include any addition comments, explanations, or text.
|
| 39 |
+
|
| 40 |
+
Original query:
|
| 41 |
+
{query}
|
| 42 |
+
"""
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
rewritten_query_response = self.llm.invoke(rewrite_prompt)
|
| 46 |
+
rewritten_query = rewritten_query_response.content.strip()
|
| 47 |
+
return rewritten_query
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
|
| 51 |
def predict_library_usage(self, query):
|
| 52 |
"""
|
| 53 |
Use the LLM to predict the relevant library for the user's query.
|
|
|
|
| 80 |
"""
|
| 81 |
Format the retrieved document and code contexts.
|
| 82 |
"""
|
| 83 |
+
doc_context = _format_kadi_api_doc_context(doc_contexts)
|
| 84 |
+
code_context = _format_kadi_apy_library_context(code_contexts)
|
| 85 |
|
| 86 |
return doc_context, code_context
|
| 87 |
|
|
|
|
| 115 |
"""
|
| 116 |
return self.llm.invoke(prompt).content
|
| 117 |
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def _format_kadi_apy_library_context(docs):
|
| 121 |
+
doc_context = []
|
|
|
|
| 122 |
|
| 123 |
+
for doc in docs:
|
| 124 |
+
# Extract metadata information
|
| 125 |
+
class_info = doc.metadata.get("class", "Unknown Class")
|
| 126 |
+
type_info = doc.metadata.get("type", "Unknown Type")
|
| 127 |
+
source_info = doc.metadata.get("source", "Unknown Type")
|
| 128 |
+
|
| 129 |
+
print(":}\n\n", doc.page_content)
|
| 130 |
+
formatted_doc = f"# source: {source_info}\n# class: {class_info}\n# type: {type_info}\n{doc.page_content}\n\n\n"
|
| 131 |
+
doc_context.append(formatted_doc)
|
| 132 |
+
|
| 133 |
+
return doc_context
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def _format_kadi_api_doc_context(docs):
|
| 137 |
+
doc_context = []
|
| 138 |
+
|
| 139 |
+
for doc in docs:
|
| 140 |
+
source_info = doc.metadata.get("source", "Unknown Type")
|
| 141 |
+
print(":}\n\n", doc.page_content)
|
| 142 |
+
formatted_doc = f"# source: {source_info}\n{doc.page_content}\n\n\n"
|
| 143 |
+
doc_context.append(formatted_doc)
|
| 144 |
|
| 145 |
+
return doc_context
|