Spaces:
Sleeping
Sleeping
| from langchain_core.tools import tool | |
| import pinecone | |
| from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
| import os | |
| from config import PINECONE_INDEX | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| GOOGLE_API_KEY = os.getenv("FLASH_API") | |
| PINECONE_API = os.getenv("PINECONE_API_KEY") | |
| google_embeddings = GoogleGenerativeAIEmbeddings( | |
| model="models/embedding-001", # Correct model name | |
| google_api_key=GOOGLE_API_KEY | |
| ) | |
| pc = pinecone.Pinecone( | |
| api_key=PINECONE_API | |
| ) | |
| index = pc.Index(PINECONE_INDEX) | |
| def get_context(query: str) -> str: | |
| """ | |
| Retrieve context information by performing a semantic search on indexed document chunks. | |
| This tool embeds the provided user query using a Google Generative AI embeddings model, | |
| then queries a Pinecone index to fetch the top 10 matching document chunks. Each match | |
| includes metadata such as the text chunk, starting page, ending page, and the source PDF URL. | |
| The function aggregates these details into a formatted string. | |
| Args: | |
| query (str): A user query search string used for semantic matching against the document index. | |
| Returns: | |
| str: A formatted string containing the matched document chunks along with their associated metadata, | |
| including start page, end page, and PDF URL. | |
| """ | |
| embedding = google_embeddings.embed_query(query) | |
| search_results = index.query( | |
| vector=embedding, | |
| top_k=15, # Retrieve top 10 results | |
| include_metadata=True | |
| ) | |
| print(search_results) | |
| context = " " | |
| count = 1 | |
| for match in search_results["matches"]: | |
| chunk = match["metadata"].get("chunk") | |
| url = match["metadata"].get("url") | |
| context += f""" | |
| Chunk {count}: | |
| {chunk} | |
| webpage_url: {url} | |
| ######################################### | |
| """ | |
| count += 1 | |
| return context | |