more minor bux fixedd
Browse files- requirements.txt +3 -1
- src/embedder.py +2 -2
- src/pinecone_store.py +13 -16
requirements.txt
CHANGED
|
@@ -11,4 +11,6 @@ numpy
|
|
| 11 |
python-dotenv
|
| 12 |
together
|
| 13 |
einops
|
| 14 |
-
opencv-python
|
|
|
|
|
|
|
|
|
| 11 |
python-dotenv
|
| 12 |
together
|
| 13 |
einops
|
| 14 |
+
opencv-python
|
| 15 |
+
timm
|
| 16 |
+
json
|
src/embedder.py
CHANGED
|
@@ -6,8 +6,8 @@ import decord
|
|
| 6 |
class InternVLEmbedder:
|
| 7 |
def __init__(self):
|
| 8 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 9 |
-
self.model = AutoModel.from_pretrained("OpenGVLab/InternVL2_5-
|
| 10 |
-
self.processor = AutoProcessor.from_pretrained("OpenGVLab/InternVL2_5-
|
| 11 |
|
| 12 |
def embed_video(self, video_path):
|
| 13 |
vr = decord.VideoReader(video_path)
|
|
|
|
| 6 |
class InternVLEmbedder:
|
| 7 |
def __init__(self):
|
| 8 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 9 |
+
self.model = AutoModel.from_pretrained("OpenGVLab/InternVL2_5-1B-MPO", trust_remote_code=True).to(self.device)
|
| 10 |
+
self.processor = AutoProcessor.from_pretrained("OpenGVLab/InternVL2_5-1B-MPO", trust_remote_code=True)
|
| 11 |
|
| 12 |
def embed_video(self, video_path):
|
| 13 |
vr = decord.VideoReader(video_path)
|
src/pinecone_store.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
import
|
| 2 |
import os
|
| 3 |
from dotenv import load_dotenv
|
| 4 |
|
|
@@ -6,27 +6,24 @@ load_dotenv()
|
|
| 6 |
|
| 7 |
class PineconeStore:
|
| 8 |
def __init__(self):
|
| 9 |
-
|
| 10 |
-
environment = os.getenv("PINECONE_ENV")
|
| 11 |
-
pinecone.init(api_key=api_key, environment=environment)
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
)
|
| 20 |
-
|
|
|
|
| 21 |
|
| 22 |
def upsert(self, id, vector, metadata):
|
| 23 |
self.index.upsert([(id, vector.tolist(), metadata)])
|
| 24 |
|
| 25 |
def query(self, vector, filter_key, top_k):
|
|
|
|
| 26 |
return [
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
top_k=top_k,
|
| 30 |
-
include_metadata=True
|
| 31 |
-
)["matches"] if filter_key in m["id"]
|
| 32 |
]
|
|
|
|
| 1 |
+
from pinecone import Pinecone, ServerlessSpec
|
| 2 |
import os
|
| 3 |
from dotenv import load_dotenv
|
| 4 |
|
|
|
|
| 6 |
|
| 7 |
class PineconeStore:
|
| 8 |
def __init__(self):
|
| 9 |
+
self.pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
|
|
|
|
|
|
|
| 10 |
|
| 11 |
+
if 'blip-soccer-highlights' not in self.pc.list_indexes().names():
|
| 12 |
+
self.pc.create_index(
|
| 13 |
+
name='blip-soccer-highlights',
|
| 14 |
+
dimension=1024, # This must match InternVL2_5-8B-MPO
|
| 15 |
+
metric='cosine',
|
| 16 |
+
spec=ServerlessSpec(cloud='aws', region=os.getenv("PINECONE_ENV", "us-east-1"))
|
| 17 |
)
|
| 18 |
+
|
| 19 |
+
self.index = self.pc.Index('blip-soccer-highlights')
|
| 20 |
|
| 21 |
def upsert(self, id, vector, metadata):
|
| 22 |
self.index.upsert([(id, vector.tolist(), metadata)])
|
| 23 |
|
| 24 |
def query(self, vector, filter_key, top_k):
|
| 25 |
+
results = self.index.query(vector.tolist(), top_k=top_k, include_metadata=True)
|
| 26 |
return [
|
| 27 |
+
match["metadata"] for match in results["matches"]
|
| 28 |
+
if filter_key in match["id"]
|
|
|
|
|
|
|
|
|
|
| 29 |
]
|