Spaces:
Runtime error
Runtime error
Commit
·
21537b7
1
Parent(s):
694a076
refactor: colpali retrieval
Browse files
docs/retreival/{multi_modal_retrieval.md → colpali.md}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
# Multi-Modal Retrieval
|
| 2 |
|
| 3 |
-
::: medrag_multi_modal.retrieval.
|
|
|
|
| 1 |
# Multi-Modal Retrieval
|
| 2 |
|
| 3 |
+
::: medrag_multi_modal.retrieval.colpali_retrieval
|
medrag_multi_modal/retrieval/{multi_modal_retrieval.py → colpali_retrieval.py}
RENAMED
|
@@ -9,9 +9,9 @@ from PIL import Image
|
|
| 9 |
from ..utils import get_wandb_artifact
|
| 10 |
|
| 11 |
|
| 12 |
-
class
|
| 13 |
"""
|
| 14 |
-
|
| 15 |
|
| 16 |
This class leverages the `byaldi.RAGMultiModalModel` to perform document retrieval tasks.
|
| 17 |
It can be initialized with a pre-trained model or from a specified W&B artifact. The class
|
|
@@ -20,10 +20,10 @@ class MultiModalRetriever(weave.Model):
|
|
| 20 |
!!! example "Indexing Data"
|
| 21 |
```python
|
| 22 |
import wandb
|
| 23 |
-
from medrag_multi_modal.retrieval import
|
| 24 |
|
| 25 |
wandb.init(project="medrag-multi-modal", entity="ml-colabs", job_type="index")
|
| 26 |
-
retriever =
|
| 27 |
retriever.index(
|
| 28 |
data_artifact_name="ml-colabs/medrag-multi-modal/grays-anatomy-images:v1",
|
| 29 |
weave_dataset_name="grays-anatomy-images:v0",
|
|
@@ -36,10 +36,10 @@ class MultiModalRetriever(weave.Model):
|
|
| 36 |
import weave
|
| 37 |
|
| 38 |
import wandb
|
| 39 |
-
from medrag_multi_modal.retrieval import
|
| 40 |
|
| 41 |
weave.init(project_name="ml-colabs/medrag-multi-modal")
|
| 42 |
-
retriever =
|
| 43 |
index_artifact_name="ml-colabs/medrag-multi-modal/grays-anatomy:v0",
|
| 44 |
metadata_dataset_name="grays-anatomy-images:v0",
|
| 45 |
data_artifact_name="ml-colabs/medrag-multi-modal/grays-anatomy-images:v1",
|
|
|
|
| 9 |
from ..utils import get_wandb_artifact
|
| 10 |
|
| 11 |
|
| 12 |
+
class CalPaliRetriever(weave.Model):
|
| 13 |
"""
|
| 14 |
+
CalPaliRetriever is a class that facilitates the retrieval of page images using ColPali.
|
| 15 |
|
| 16 |
This class leverages the `byaldi.RAGMultiModalModel` to perform document retrieval tasks.
|
| 17 |
It can be initialized with a pre-trained model or from a specified W&B artifact. The class
|
|
|
|
| 20 |
!!! example "Indexing Data"
|
| 21 |
```python
|
| 22 |
import wandb
|
| 23 |
+
from medrag_multi_modal.retrieval import CalPaliRetriever
|
| 24 |
|
| 25 |
wandb.init(project="medrag-multi-modal", entity="ml-colabs", job_type="index")
|
| 26 |
+
retriever = CalPaliRetriever()
|
| 27 |
retriever.index(
|
| 28 |
data_artifact_name="ml-colabs/medrag-multi-modal/grays-anatomy-images:v1",
|
| 29 |
weave_dataset_name="grays-anatomy-images:v0",
|
|
|
|
| 36 |
import weave
|
| 37 |
|
| 38 |
import wandb
|
| 39 |
+
from medrag_multi_modal.retrieval import CalPaliRetriever
|
| 40 |
|
| 41 |
weave.init(project_name="ml-colabs/medrag-multi-modal")
|
| 42 |
+
retriever = CalPaliRetriever.from_artifact(
|
| 43 |
index_artifact_name="ml-colabs/medrag-multi-modal/grays-anatomy:v0",
|
| 44 |
metadata_dataset_name="grays-anatomy-images:v0",
|
| 45 |
data_artifact_name="ml-colabs/medrag-multi-modal/grays-anatomy-images:v1",
|
mkdocs.yml
CHANGED
|
@@ -72,6 +72,6 @@ nav:
|
|
| 72 |
- Image Loader: 'document_loader/load_image.md'
|
| 73 |
- Chunking: 'chunking.md'
|
| 74 |
- Retrieval:
|
| 75 |
-
-
|
| 76 |
|
| 77 |
repo_url: https://github.com/soumik12345/medrag-multi-modal
|
|
|
|
| 72 |
- Image Loader: 'document_loader/load_image.md'
|
| 73 |
- Chunking: 'chunking.md'
|
| 74 |
- Retrieval:
|
| 75 |
+
- ColPali Retrieval: 'retreival/colpali.md'
|
| 76 |
|
| 77 |
repo_url: https://github.com/soumik12345/medrag-multi-modal
|
pyproject.toml
CHANGED
|
@@ -5,6 +5,7 @@ description = ""
|
|
| 5 |
readme = "README.md"
|
| 6 |
requires-python = ">=3.10"
|
| 7 |
dependencies = [
|
|
|
|
| 8 |
"Byaldi>=0.0.5",
|
| 9 |
"firerequests>=0.0.7",
|
| 10 |
"pdf2image>=1.17.0",
|
|
@@ -35,6 +36,7 @@ dependencies = [
|
|
| 35 |
|
| 36 |
[project.optional-dependencies]
|
| 37 |
core = [
|
|
|
|
| 38 |
"Byaldi>=0.0.5",
|
| 39 |
"firerequests>=0.0.7",
|
| 40 |
"marker-pdf>=0.2.17",
|
|
|
|
| 5 |
readme = "README.md"
|
| 6 |
requires-python = ">=3.10"
|
| 7 |
dependencies = [
|
| 8 |
+
"bm25s[full]>=0.2.2",
|
| 9 |
"Byaldi>=0.0.5",
|
| 10 |
"firerequests>=0.0.7",
|
| 11 |
"pdf2image>=1.17.0",
|
|
|
|
| 36 |
|
| 37 |
[project.optional-dependencies]
|
| 38 |
core = [
|
| 39 |
+
"bm25s[full]>=0.2.2",
|
| 40 |
"Byaldi>=0.0.5",
|
| 41 |
"firerequests>=0.0.7",
|
| 42 |
"marker-pdf>=0.2.17",
|