Spaces:

sunny333
/

multimodalRAG

Runtime error

App Files Files Community

multimodalRAG / RAG_MLM /ragMLM.py

sunny333

initial commit

568cd7b 12 months ago

raw

history blame contribute delete

3.56 kB

	from operator import itemgetter
	from langchain_core.runnables import RunnableLambda, RunnablePassthrough
	from langchain_core.messages import HumanMessage
	from . import utility as ut
	from . import embedder as ed
	from langchain_core.output_parsers import StrOutputParser
	from langchain_openai import ChatOpenAI
	import os
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()

	OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
	print(OPENAI_API_KEY)

	chatgpt = ChatOpenAI(model_name='gpt-4o', temperature=0)
	def multimodal_prompt_function(data_dict):
	"""
	Create a multimodal prompt with both text and image context.
	This function formats the provided context from `data_dict`, which contains
	text, tables, and base64-encoded images. It joins the text (with table) portions
	and prepares the image(s) in a base64-encoded format to be included in a
	message.
	The formatted text and images (context) along with the user question are used to
	construct a prompt for GPT-4o
	"""
	formatted_texts = "\n".join(data_dict["context"]["texts"])
	messages = []

	# Adding image(s) to the messages if present
	if data_dict["context"]["images"]:
	for image in data_dict["context"]["images"]:
	image_message = {
	"type": "image_url",
	"image_url": {"url": f"data:image/jpeg;base64,{image}"},
	}
	messages.append(image_message)

	# Adding the text for analysis
	text_message = {
	"type": "text",
	"text": (
	f"""You are an analyst tasked with understanding detailed information
	and trends from text documents,
	data tables, and charts and graphs in images.
	You will be given context information below which will be a mix of
	text, tables, and images usually of charts or graphs.
	Use this information to provide answers related to the user
	question.
	Do not make up answers, use the provided context documents below and
	answer the question to the best of your ability.

	User question:
	{data_dict['question']}

	Context documents:
	{formatted_texts}

	Answer:
	"""
	),
	}
	messages.append(text_message)
	return [HumanMessage(content=messages)]

	# Create RAG chain
	multimodal_rag = (
	{
	"context": itemgetter('context'),
	"question": itemgetter('input'),
	}
	\|
	RunnableLambda(multimodal_prompt_function)
	\|
	chatgpt
	\|
	StrOutputParser()
	)

	# Pass input query to retriever and get context document elements
	retrieve_docs = (itemgetter('input')
	\|
	ed.retriever_multi_vector
	\|
	RunnableLambda(ut.split_image_text_types))

	# Below, we chain `.assign` calls. This takes a dict and successively
	# adds keys-- "context" and "answer"-- where the value for each key
	# is determined by a Runnable (function or chain executing at runtime).
	# This helps in having the retrieved context along with the answer generated by GPT-4o
	multimodal_rag_w_sources = (RunnablePassthrough.assign(context=retrieve_docs)
	.assign(answer=multimodal_rag)
	)


	#------ direct testing-------
	#response = multimodal_rag_w_sources.invoke({'input': query})