Spaces:
Paused
Paused
GPU enabled - small bug fix for LLM
Browse files
app.py
CHANGED
|
@@ -128,14 +128,11 @@ default_query_engine = load_RAG_pipeline(config)
|
|
| 128 |
|
| 129 |
# These are placeholder functions to simulate the behavior of the RAG setup.
|
| 130 |
# You would need to implement these with the actual logic to retrieve and generate answers based on the document.
|
| 131 |
-
def get_answer(question,
|
| 132 |
# Here you should implement the logic to generate an answer based on the question and the document.
|
| 133 |
# For example, you could use a machine learning model for RAG.
|
| 134 |
# answer = "This is a placeholder answer."
|
| 135 |
# https://docs.llamaindex.ai/en/stable/module_guides/supporting_modules/settings/#setting-local-configurations
|
| 136 |
-
|
| 137 |
-
# if temperature or nucleus sampling or max_tokens != as in config, recall query engine
|
| 138 |
-
|
| 139 |
response = query_engine.query(question)
|
| 140 |
print(f"A: {response}")
|
| 141 |
return response
|
|
@@ -153,8 +150,6 @@ def get_answer_page(response):
|
|
| 153 |
# Create the gr.Interface function
|
| 154 |
def ask_my_thesis(
|
| 155 |
question,
|
| 156 |
-
LLM=config["LLM"],
|
| 157 |
-
embeddings=config["embeddings"],
|
| 158 |
similarity_top_k=config["similarity_top_k"],
|
| 159 |
context_window=config["context_window"],
|
| 160 |
max_new_tokens=config["max_new_tokens"],
|
|
@@ -173,8 +168,8 @@ def ask_my_thesis(
|
|
| 173 |
temperature != config["temperature"]
|
| 174 |
or top_p != config["top_p"]
|
| 175 |
or max_new_tokens != config["max_new_tokens"]
|
| 176 |
-
or LLM != config["LLM"]
|
| 177 |
-
or embeddings != config["embeddings"]
|
| 178 |
or similarity_top_k != config["similarity_top_k"]
|
| 179 |
or context_window != config["context_window"]
|
| 180 |
or top_k != config["top_k"]
|
|
@@ -185,7 +180,7 @@ def ask_my_thesis(
|
|
| 185 |
config["temperature"] = temperature
|
| 186 |
config["top_p"] = top_p
|
| 187 |
config["max_new_tokens"] = max_new_tokens
|
| 188 |
-
config["LLM"] = LLM
|
| 189 |
# config["embeddings"] = embeddings
|
| 190 |
config["similarity_top_k"] = similarity_top_k
|
| 191 |
config["context_window"] = context_window
|
|
@@ -213,11 +208,11 @@ additional_inputs = [
|
|
| 213 |
# gr.Input("text", label="Question"),
|
| 214 |
# gr.Input("text", label="LLM", value=config["LLM"]),
|
| 215 |
# gr.Input("text", label="Embeddings", value=config["embeddings"]),
|
| 216 |
-
gr.Slider(1, 5, value=config["similarity_top_k"], label="Similarity Top K"),
|
| 217 |
gr.Slider(512, 8048, value=config["context_window"], label="Context Window"),
|
| 218 |
-
gr.Slider(20,
|
| 219 |
gr.Slider(0, 1, value=config["temperature"], label="Temperature"),
|
| 220 |
-
gr.Slider(1, 10, value=config["top_k"], label="Top K"),
|
| 221 |
gr.Slider(0, 1, value=config["top_p"], label="Nucleus Sampling"),
|
| 222 |
gr.Slider(128, 4024, value=config["chunk_size"], label="Chunk Size"),
|
| 223 |
gr.Slider(0, 200, value=config["chunk_overlap"], label="Chunk Overlap"),
|
|
|
|
| 128 |
|
| 129 |
# These are placeholder functions to simulate the behavior of the RAG setup.
|
| 130 |
# You would need to implement these with the actual logic to retrieve and generate answers based on the document.
|
| 131 |
+
def get_answer(question, query_engine=default_query_engine):
|
| 132 |
# Here you should implement the logic to generate an answer based on the question and the document.
|
| 133 |
# For example, you could use a machine learning model for RAG.
|
| 134 |
# answer = "This is a placeholder answer."
|
| 135 |
# https://docs.llamaindex.ai/en/stable/module_guides/supporting_modules/settings/#setting-local-configurations
|
|
|
|
|
|
|
|
|
|
| 136 |
response = query_engine.query(question)
|
| 137 |
print(f"A: {response}")
|
| 138 |
return response
|
|
|
|
| 150 |
# Create the gr.Interface function
|
| 151 |
def ask_my_thesis(
|
| 152 |
question,
|
|
|
|
|
|
|
| 153 |
similarity_top_k=config["similarity_top_k"],
|
| 154 |
context_window=config["context_window"],
|
| 155 |
max_new_tokens=config["max_new_tokens"],
|
|
|
|
| 168 |
temperature != config["temperature"]
|
| 169 |
or top_p != config["top_p"]
|
| 170 |
or max_new_tokens != config["max_new_tokens"]
|
| 171 |
+
# or LLM != config["LLM"]
|
| 172 |
+
# or embeddings != config["embeddings"]
|
| 173 |
or similarity_top_k != config["similarity_top_k"]
|
| 174 |
or context_window != config["context_window"]
|
| 175 |
or top_k != config["top_k"]
|
|
|
|
| 180 |
config["temperature"] = temperature
|
| 181 |
config["top_p"] = top_p
|
| 182 |
config["max_new_tokens"] = max_new_tokens
|
| 183 |
+
# config["LLM"] = LLM
|
| 184 |
# config["embeddings"] = embeddings
|
| 185 |
config["similarity_top_k"] = similarity_top_k
|
| 186 |
config["context_window"] = context_window
|
|
|
|
| 208 |
# gr.Input("text", label="Question"),
|
| 209 |
# gr.Input("text", label="LLM", value=config["LLM"]),
|
| 210 |
# gr.Input("text", label="Embeddings", value=config["embeddings"]),
|
| 211 |
+
gr.Slider(1, 5, value=config["similarity_top_k"], label="Similarity Top K", step=1),
|
| 212 |
gr.Slider(512, 8048, value=config["context_window"], label="Context Window"),
|
| 213 |
+
gr.Slider(20, 500, value=config["max_new_tokens"], label="Max New Tokens"),
|
| 214 |
gr.Slider(0, 1, value=config["temperature"], label="Temperature"),
|
| 215 |
+
gr.Slider(1, 10, value=config["top_k"], label="Top K", step=1),
|
| 216 |
gr.Slider(0, 1, value=config["top_p"], label="Nucleus Sampling"),
|
| 217 |
gr.Slider(128, 4024, value=config["chunk_size"], label="Chunk Size"),
|
| 218 |
gr.Slider(0, 200, value=config["chunk_overlap"], label="Chunk Overlap"),
|