Spaces:

youdata-ai
/

rites-pdf

Sleeping

App Files Files Community

akshansh36 commited on Feb 14, 2025

Commit

9bd3750

verified ·

1 Parent(s): 7d4344b

Update tools.py

Browse files

Files changed (1) hide show

tools.py +73 -73

tools.py CHANGED Viewed

@@ -1,73 +1,73 @@
-from langchain_core.tools import tool
-import pinecone
-from langchain_google_genai import GoogleGenerativeAIEmbeddings
-import os
-from dotenv import load_dotenv
-load_dotenv()
-GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")
-PINECONE_API = os.getenv("PINECONE_API_KEY")
-google_embeddings = GoogleGenerativeAIEmbeddings(
-    model="models/embedding-001",  # Correct model name
-    google_api_key=GOOGLE_API_KEY
-)
-pc = pinecone.Pinecone(
-    api_key=PINECONE_API
-)
-PINECONE_INDEX = "rites-pdf"
-index = pc.Index(PINECONE_INDEX)
-@tool
-def get_context(query: str) -> str:
-    """
-    Retrieve context information by performing a semantic search on indexed document chunks.
-    This tool embeds the provided user query using a Google Generative AI embeddings model,
-    then queries a Pinecone index to fetch the top 10 matching document chunks. Each match
-    includes metadata such as the text chunk, starting page, ending page, and the source PDF URL.
-    The function aggregates these details into a formatted string.
-    Args:
-        query (str): A user query search string used for semantic matching against the document index.
-    Returns:
-        str: A formatted string containing the matched document chunks along with their associated metadata,
-             including start page, end page, and PDF URL.
-    """
-    embedding = google_embeddings.embed_query(query)
-    search_results = index.query(
-        vector=embedding,
-        top_k=10,  # Retrieve top 10 results
-        include_metadata=True
-    )
-    context = " "
-    count = 1
-    for match in search_results["matches"]:
-        chunk = match["metadata"].get("chunk")
-        url = match["metadata"].get("pdf_url")
-        start_page = match["metadata"].get("start_page")
-        end_page = match["metadata"].get("end_page")
-        context += f"""
-        Chunk {count}:
-        {chunk}
-        start_page: {start_page}
-        end_page: {end_page}
-        pdf_url: {url}
-        #########################################
-        """
-        count += 1
-    return context

+from langchain_core.tools import tool
+import pinecone
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+import os
+from dotenv import load_dotenv
+load_dotenv()
+GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")
+PINECONE_API = os.getenv("PINECONE_API_KEY")
+google_embeddings = GoogleGenerativeAIEmbeddings(
+    model="models/embedding-001",  # Correct model name
+    google_api_key=GOOGLE_API_KEY
+)
+pc = pinecone.Pinecone(
+    api_key=PINECONE_API
+)
+PINECONE_INDEX = "rites-pdf"
+index = pc.Index(PINECONE_INDEX)
+@tool
+def get_context(query: str) -> str:
+    """
+    Retrieve context information by performing a semantic search on indexed document chunks.
+    This tool embeds the provided user query using a Google Generative AI embeddings model,
+    then queries a Pinecone index to fetch the top 10 matching document chunks. Each match
+    includes metadata such as the text chunk, starting page, ending page, and the source PDF URL.
+    The function aggregates these details into a formatted string.
+    Args:
+        query (str): A user query search string used for semantic matching against the document index.
+    Returns:
+        str: A formatted string containing the matched document chunks along with their associated metadata,
+             including start page, end page, and PDF URL.
+    """
+    embedding = google_embeddings.embed_query(query)
+    search_results = index.query(
+        vector=embedding,
+        top_k=20,  # Retrieve top 10 results
+        include_metadata=True
+    )
+    context = " "
+    count = 1
+    for match in search_results["matches"]:
+        chunk = match["metadata"].get("chunk")
+        url = match["metadata"].get("pdf_url")
+        start_page = match["metadata"].get("start_page")
+        end_page = match["metadata"].get("end_page")
+        context += f"""
+        Chunk {count}:
+        {chunk}
+        start_page: {start_page}
+        end_page: {end_page}
+        pdf_url: {url}
+        #########################################
+        """
+        count += 1
+    return context