Spaces:

youdata-ai
/

mea_chatbot

Sleeping

App Files Files Community

akshansh36 commited on Aug 29, 2025

Commit

cd09618

verified ·

1 Parent(s): 1b32752

Create utils/tools.py

Browse files

Files changed (1) hide show

src/utils/tools.py +72 -0

src/utils/tools.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from langchain_core.tools import tool
+import pinecone
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+import os
+from config import PINECONE_INDEX
+from dotenv import load_dotenv
+load_dotenv()
+GOOGLE_API_KEY = os.getenv("FLASH_API")
+PINECONE_API = os.getenv("PINECONE_API_KEY")
+google_embeddings = GoogleGenerativeAIEmbeddings(
+    model="models/embedding-001",  # Correct model name
+    google_api_key=GOOGLE_API_KEY
+)
+pc = pinecone.Pinecone(
+    api_key=PINECONE_API
+)
+index = pc.Index(PINECONE_INDEX)
+@tool
+def get_context(query: str) -> str:
+    """
+    Retrieve context information by performing a semantic search on indexed document chunks.
+    This tool embeds the provided user query using a Google Generative AI embeddings model,
+    then queries a Pinecone index to fetch the top 10 matching document chunks. Each match
+    includes metadata such as the text chunk, starting page, ending page, and the source PDF URL.
+    The function aggregates these details into a formatted string.
+    Args:
+        query (str): A user query search string used for semantic matching against the document index.
+    Returns:
+        str: A formatted string containing the matched document chunks along with their associated metadata,
+             including start page, end page, and PDF URL.
+    """
+    embedding = google_embeddings.embed_query(query)
+    search_results = index.query(
+        vector=embedding,
+        top_k=15,  # Retrieve top 10 results
+        include_metadata=True
+    )
+    print(search_results)
+    context = " "
+    count = 1
+    for match in search_results["matches"]:
+        chunk = match["metadata"].get("chunk")
+        url = match["metadata"].get("url")
+        context += f"""
+        Chunk {count}:
+        {chunk}
+        webpage_url: {url}
+        #########################################
+        """
+        count += 1
+    return context