| from pinecone import Pinecone |
| from sentence_transformers import SentenceTransformer |
|
|
| class PineconeRetriever: |
| def __init__(self, api_key: str, index_name: str): |
| self.model = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L3-v2") |
| self.pinecone = Pinecone(api_key=api_key) |
| self.index = self.pinecone.Index(index_name) |
|
|
| def retrieve(self, query: str, top_k: int = 5): |
| query_emb = self.model.encode(query).tolist() |
| results = self.index.query(vector=query_emb, top_k=top_k, include_metadata=True) |
| matches = results.get("matches", []) |
| docs = [] |
| for match in matches: |
| meta = match["metadata"] |
| docs.append({ |
| "content": meta.get("context", ""), |
| "page": meta.get("page"), |
| "score": match.get("score") |
| }) |
| return docs |
|
|