Spaces:

DeathBlade020
/

RAGTechniquesComparisonTool

Sleeping

App Files Files Community

RAGTechniquesComparisonTool / RagFusion.py

DeathBlade020

Upload 8 files

6c044be verified 10 months ago

raw

history blame contribute delete

3.37 kB

	import os
	from dotenv import load_dotenv
	from langchain.prompts import ChatPromptTemplate
	from langchain.load import dumps, loads
	from operator import itemgetter
	from langchain_core.output_parsers import StrOutputParser
	from langchain_openai import ChatOpenAI

	from helper import get_retriever

	load_dotenv()


	def reciprocal_rank_fusion(results: list[list], k=60):
	""" Reciprocal_rank_fusion that takes multiple lists of ranked documents
	and an optional parameter k used in the RRF formula """

	# Initialize a dictionary to hold fused scores for each unique document
	fused_scores = {}

	# Iterate through each list of ranked documents
	for docs in results:
	# Iterate through each document in the list, with its rank (position in the list)
	for rank, doc in enumerate(docs):
	# Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
	doc_str = dumps(doc)
	# If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
	if doc_str not in fused_scores:
	fused_scores[doc_str] = 0
	# Retrieve the current score of the document, if any
	# Update the score of the document using the RRF formula: 1 / (rank + k)
	fused_scores[doc_str] += 1 / (rank + k)

	# Sort the documents based on their fused scores in descending order to get the final reranked results
	reranked_results = [
	(loads(doc), score)
	for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
	]

	for i, (doc, score) in enumerate(reranked_results):
	print(f"Rank {i+1} - Score: {score:.4f}")

	# Return the reranked results as a list of tuples, each containing the document and its fused score
	return reranked_results


	def get_answer_using_rag_fusion(link: str, question: str):

	# RAG-Fusion: Related
	template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
	Generate multiple search queries related to: {question} \n
	Output (4 queries):"""
	prompt_rag_fusion = ChatPromptTemplate.from_template(template)

	generate_queries = (
	prompt_rag_fusion
	\| ChatOpenAI(temperature=0)
	\| StrOutputParser()
	\| (lambda x: x.split("\n"))
	)
	retrievar = get_retriever(link)
	retrieval_chain_rag_fusion = generate_queries \| retrievar.map() \| reciprocal_rank_fusion
	docs = retrieval_chain_rag_fusion.invoke({"question": question})
	template = """Answer the following question based on this context:

	{context}

	Question: {question}
	"""

	prompt = ChatPromptTemplate.from_template(template)
	llm = ChatOpenAI(temperature=0)
	final_rag_chain = (
	{"context": retrieval_chain_rag_fusion,
	"question": itemgetter("question")}
	\| prompt
	\| llm
	\| StrOutputParser()
	)

	response = final_rag_chain.invoke({"question":question})
	return response


	# if __name__ == "__main__":
	# link = "https://lilianweng.github.io/posts/2023-06-23-agent/"
	# question = "What is task decomposition for LLM agents?"
	# answer = get_answer(link, question)
	# print(answer)