Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -173,7 +173,6 @@ class MultimodalQueryEngine(CustomQueryEngine):
|
|
| 173 |
# return Response(response=str(llm_response), source_nodes=nodes, metadata={"text_nodes": text_nodes, "image_nodes": image_nodes})
|
| 174 |
|
| 175 |
|
| 176 |
-
|
| 177 |
class MultimodalQueryEngine(CustomQueryEngine):
|
| 178 |
qa_prompt: PromptTemplate
|
| 179 |
retriever: BaseRetriever
|
|
@@ -203,7 +202,6 @@ class MultimodalQueryEngine(CustomQueryEngine):
|
|
| 203 |
nodes, query_bundle=QueryBundle(query_str)
|
| 204 |
)
|
| 205 |
|
| 206 |
-
|
| 207 |
# create image nodes from the image associated with those nodes
|
| 208 |
image_nodes = [
|
| 209 |
NodeWithScore(node=ImageNode(image_path=n.node.metadata["image_path"]))
|
|
@@ -216,17 +214,20 @@ class MultimodalQueryEngine(CustomQueryEngine):
|
|
| 216 |
)
|
| 217 |
|
| 218 |
# prompt for the LLM
|
| 219 |
-
fmt_prompt = self.qa_prompt.format(
|
|
|
|
|
|
|
| 220 |
|
| 221 |
# use the multimodal LLM to interpret images and generate a response to the prompt
|
| 222 |
-
|
| 223 |
prompt=fmt_prompt,
|
| 224 |
image_documents=[image_node.node for image_node in image_nodes],
|
| 225 |
)
|
|
|
|
| 226 |
return Response(
|
| 227 |
-
response=str(
|
| 228 |
source_nodes=nodes,
|
| 229 |
-
metadata={"text_nodes":
|
| 230 |
)
|
| 231 |
|
| 232 |
query_engine = MultimodalQueryEngine(QA_PROMPT, retriever, gpt_4o_mm)
|
|
|
|
| 173 |
# return Response(response=str(llm_response), source_nodes=nodes, metadata={"text_nodes": text_nodes, "image_nodes": image_nodes})
|
| 174 |
|
| 175 |
|
|
|
|
| 176 |
class MultimodalQueryEngine(CustomQueryEngine):
|
| 177 |
qa_prompt: PromptTemplate
|
| 178 |
retriever: BaseRetriever
|
|
|
|
| 202 |
nodes, query_bundle=QueryBundle(query_str)
|
| 203 |
)
|
| 204 |
|
|
|
|
| 205 |
# create image nodes from the image associated with those nodes
|
| 206 |
image_nodes = [
|
| 207 |
NodeWithScore(node=ImageNode(image_path=n.node.metadata["image_path"]))
|
|
|
|
| 214 |
)
|
| 215 |
|
| 216 |
# prompt for the LLM
|
| 217 |
+
fmt_prompt = self.qa_prompt.format(
|
| 218 |
+
context_str=ctx_str, query_str=query_str, encoded_image_url=encoded_image_url
|
| 219 |
+
)
|
| 220 |
|
| 221 |
# use the multimodal LLM to interpret images and generate a response to the prompt
|
| 222 |
+
llm_response = self.multi_modal_llm.complete(
|
| 223 |
prompt=fmt_prompt,
|
| 224 |
image_documents=[image_node.node for image_node in image_nodes],
|
| 225 |
)
|
| 226 |
+
|
| 227 |
return Response(
|
| 228 |
+
response=str(llm_response),
|
| 229 |
source_nodes=nodes,
|
| 230 |
+
metadata={"text_nodes": nodes, "image_nodes": image_nodes},
|
| 231 |
)
|
| 232 |
|
| 233 |
query_engine = MultimodalQueryEngine(QA_PROMPT, retriever, gpt_4o_mm)
|