Spaces:
Runtime error
Runtime error
| from operator import itemgetter | |
| from langchain_core.runnables import RunnableLambda, RunnablePassthrough | |
| from langchain_core.messages import HumanMessage | |
| from . import utility as ut | |
| from . import embedder as ed | |
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain_openai import ChatOpenAI | |
| import os | |
| from dotenv import load_dotenv | |
| # Load environment variables | |
| load_dotenv() | |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
| print(OPENAI_API_KEY) | |
| chatgpt = ChatOpenAI(model_name='gpt-4o', temperature=0) | |
| def multimodal_prompt_function(data_dict): | |
| """ | |
| Create a multimodal prompt with both text and image context. | |
| This function formats the provided context from `data_dict`, which contains | |
| text, tables, and base64-encoded images. It joins the text (with table) portions | |
| and prepares the image(s) in a base64-encoded format to be included in a | |
| message. | |
| The formatted text and images (context) along with the user question are used to | |
| construct a prompt for GPT-4o | |
| """ | |
| formatted_texts = "\n".join(data_dict["context"]["texts"]) | |
| messages = [] | |
| # Adding image(s) to the messages if present | |
| if data_dict["context"]["images"]: | |
| for image in data_dict["context"]["images"]: | |
| image_message = { | |
| "type": "image_url", | |
| "image_url": {"url": f"data:image/jpeg;base64,{image}"}, | |
| } | |
| messages.append(image_message) | |
| # Adding the text for analysis | |
| text_message = { | |
| "type": "text", | |
| "text": ( | |
| f"""You are an analyst tasked with understanding detailed information | |
| and trends from text documents, | |
| data tables, and charts and graphs in images. | |
| You will be given context information below which will be a mix of | |
| text, tables, and images usually of charts or graphs. | |
| Use this information to provide answers related to the user | |
| question. | |
| Do not make up answers, use the provided context documents below and | |
| answer the question to the best of your ability. | |
| User question: | |
| {data_dict['question']} | |
| Context documents: | |
| {formatted_texts} | |
| Answer: | |
| """ | |
| ), | |
| } | |
| messages.append(text_message) | |
| return [HumanMessage(content=messages)] | |
| # Create RAG chain | |
| multimodal_rag = ( | |
| { | |
| "context": itemgetter('context'), | |
| "question": itemgetter('input'), | |
| } | |
| | | |
| RunnableLambda(multimodal_prompt_function) | |
| | | |
| chatgpt | |
| | | |
| StrOutputParser() | |
| ) | |
| # Pass input query to retriever and get context document elements | |
| retrieve_docs = (itemgetter('input') | |
| | | |
| ed.retriever_multi_vector | |
| | | |
| RunnableLambda(ut.split_image_text_types)) | |
| # Below, we chain `.assign` calls. This takes a dict and successively | |
| # adds keys-- "context" and "answer"-- where the value for each key | |
| # is determined by a Runnable (function or chain executing at runtime). | |
| # This helps in having the retrieved context along with the answer generated by GPT-4o | |
| multimodal_rag_w_sources = (RunnablePassthrough.assign(context=retrieve_docs) | |
| .assign(answer=multimodal_rag) | |
| ) | |
| #------ direct testing------- | |
| #response = multimodal_rag_w_sources.invoke({'input': query}) |