working
Browse files
app.py
CHANGED
|
@@ -40,7 +40,7 @@ else:
|
|
| 40 |
joblib.dump(docs_processed, cache_file)
|
| 41 |
print("Created and saved docs_processed to cache.")
|
| 42 |
|
| 43 |
-
class RetrieverTool(
|
| 44 |
name = "retriever"
|
| 45 |
description = "Uses semantic search to retrieve the parts of documentation that could be most relevant to answer your query."
|
| 46 |
inputs = {
|
|
@@ -52,14 +52,14 @@ class RetrieverTool(Tool):
|
|
| 52 |
output_type = "string"
|
| 53 |
|
| 54 |
def __init__(self, docs, **kwargs):
|
| 55 |
-
super().__init__(**kwargs)
|
| 56 |
|
| 57 |
self.retriever = BM25Retriever.from_documents(
|
| 58 |
docs,
|
| 59 |
k=7,
|
| 60 |
)
|
| 61 |
|
| 62 |
-
def
|
| 63 |
assert isinstance(query, str), "Your search query must be a string"
|
| 64 |
|
| 65 |
docs = self.retriever.invoke(
|
|
@@ -72,6 +72,8 @@ class RetrieverTool(Tool):
|
|
| 72 |
]
|
| 73 |
)
|
| 74 |
|
|
|
|
|
|
|
| 75 |
retriever_tool = RetrieverTool(docs_processed)
|
| 76 |
# Download gguf model files
|
| 77 |
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
|
@@ -85,7 +87,7 @@ hf_hub_download(
|
|
| 85 |
t5_size="base"
|
| 86 |
hf_hub_download(
|
| 87 |
repo_id=f"Felladrin/gguf-flan-t5-{t5_size}",
|
| 88 |
-
filename=f"flan-t5-{
|
| 89 |
local_dir="./models",
|
| 90 |
)
|
| 91 |
|
|
@@ -159,6 +161,7 @@ def generate_t5(llama,message):#text size must be smaller than ctx(default=512)
|
|
| 159 |
return None
|
| 160 |
|
| 161 |
|
|
|
|
| 162 |
def to_query(question):
|
| 163 |
system = """
|
| 164 |
You are a query rewriter. Your task is to convert a user's question into a concise search query suitable for information retrieval.
|
|
@@ -200,6 +203,17 @@ Search Query:
|
|
| 200 |
return None
|
| 201 |
|
| 202 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
def answer(document:str,question:str,model:str="Qwen2.5-0.5B-Rag-Thinking.i1-Q6_K.gguf")->str:
|
| 204 |
global llm
|
| 205 |
global llm_model
|
|
@@ -251,7 +265,9 @@ def respond(
|
|
| 251 |
if model is None:#
|
| 252 |
return
|
| 253 |
|
| 254 |
-
|
|
|
|
|
|
|
| 255 |
|
| 256 |
# Create a chat interface
|
| 257 |
demo = gr.ChatInterface(
|
|
|
|
| 40 |
joblib.dump(docs_processed, cache_file)
|
| 41 |
print("Created and saved docs_processed to cache.")
|
| 42 |
|
| 43 |
+
class RetrieverTool():
|
| 44 |
name = "retriever"
|
| 45 |
description = "Uses semantic search to retrieve the parts of documentation that could be most relevant to answer your query."
|
| 46 |
inputs = {
|
|
|
|
| 52 |
output_type = "string"
|
| 53 |
|
| 54 |
def __init__(self, docs, **kwargs):
|
| 55 |
+
#super().__init__(**kwargs)
|
| 56 |
|
| 57 |
self.retriever = BM25Retriever.from_documents(
|
| 58 |
docs,
|
| 59 |
k=7,
|
| 60 |
)
|
| 61 |
|
| 62 |
+
def __call__(self, query: str) -> str:
|
| 63 |
assert isinstance(query, str), "Your search query must be a string"
|
| 64 |
|
| 65 |
docs = self.retriever.invoke(
|
|
|
|
| 72 |
]
|
| 73 |
)
|
| 74 |
|
| 75 |
+
|
| 76 |
+
|
| 77 |
retriever_tool = RetrieverTool(docs_processed)
|
| 78 |
# Download gguf model files
|
| 79 |
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
|
|
|
| 87 |
t5_size="base"
|
| 88 |
hf_hub_download(
|
| 89 |
repo_id=f"Felladrin/gguf-flan-t5-{t5_size}",
|
| 90 |
+
filename=f"flan-t5-{t5_size}.Q8_0.gguf",
|
| 91 |
local_dir="./models",
|
| 92 |
)
|
| 93 |
|
|
|
|
| 161 |
return None
|
| 162 |
|
| 163 |
|
| 164 |
+
llama = None
|
| 165 |
def to_query(question):
|
| 166 |
system = """
|
| 167 |
You are a query rewriter. Your task is to convert a user's question into a concise search query suitable for information retrieval.
|
|
|
|
| 203 |
return None
|
| 204 |
|
| 205 |
|
| 206 |
+
qwen_prompt = """<|im_start|>system
|
| 207 |
+
You answer questions from the user, always using the context provided as a basis.
|
| 208 |
+
Write down your reasoning for answering the question, between the <think> and </think> tags.<|im_end|>
|
| 209 |
+
<|im_start|>user
|
| 210 |
+
Context:
|
| 211 |
+
%s
|
| 212 |
+
Question:
|
| 213 |
+
%s<|im_end|>
|
| 214 |
+
<|im_start|>assistant
|
| 215 |
+
<think>"""
|
| 216 |
+
|
| 217 |
def answer(document:str,question:str,model:str="Qwen2.5-0.5B-Rag-Thinking.i1-Q6_K.gguf")->str:
|
| 218 |
global llm
|
| 219 |
global llm_model
|
|
|
|
| 265 |
if model is None:#
|
| 266 |
return
|
| 267 |
|
| 268 |
+
query = to_query(message)
|
| 269 |
+
document = retriever_tool(query=query)
|
| 270 |
+
return answer(document,message)
|
| 271 |
|
| 272 |
# Create a chat interface
|
| 273 |
demo = gr.ChatInterface(
|