Spaces:

youdata-ai
/

cot_bot

Sleeping

App Files Files Community

cot_bot / src /utils /tools.py

akshansh36

Create utils/tools.py

47247cf verified 8 months ago

raw

history blame contribute delete

1.91 kB

	from langchain_core.tools import tool
	import pinecone
	from langchain_google_genai import GoogleGenerativeAIEmbeddings
	import os
	from config import PINECONE_INDEX
	from dotenv import load_dotenv

	load_dotenv()
	GOOGLE_API_KEY = os.getenv("FLASH_API")
	PINECONE_API = os.getenv("PINECONE_API_KEY")

	google_embeddings = GoogleGenerativeAIEmbeddings(
	model="models/embedding-001", # Correct model name
	google_api_key=GOOGLE_API_KEY
	)

	pc = pinecone.Pinecone(
	api_key=PINECONE_API
	)


	index = pc.Index(PINECONE_INDEX)

	@tool
	def get_context(query: str) -> str:
	"""
	Retrieve context information by performing a semantic search on indexed document chunks.

	This tool embeds the provided user query using a Google Generative AI embeddings model,
	then queries a Pinecone index to fetch the top 10 matching document chunks. Each match
	includes metadata such as the text chunk, starting page, ending page, and the source PDF URL.
	The function aggregates these details into a formatted string.

	Args:
	query (str): A user query search string used for semantic matching against the document index.

	Returns:
	str: A formatted string containing the matched document chunks along with their associated metadata,
	including start page, end page, and PDF URL.
	"""
	embedding = google_embeddings.embed_query(query)
	search_results = index.query(
	vector=embedding,
	top_k=15, # Retrieve top 10 results
	include_metadata=True
	)

	print(search_results)
	context = " "
	count = 1
	for match in search_results["matches"]:
	chunk = match["metadata"].get("chunk")
	url = match["metadata"].get("url")

	context += f"""
	Chunk {count}:
	{chunk}
	webpage_url: {url}
	#########################################
	"""
	count += 1

	return context