Spaces:
Build error
Build error
Commit
·
b33ec72
1
Parent(s):
b1d30e1
try embed
Browse files
app.py
CHANGED
|
@@ -2,6 +2,8 @@ import gradio as gr
|
|
| 2 |
import spaces
|
| 3 |
import torch
|
| 4 |
import vdf_io
|
|
|
|
|
|
|
| 5 |
|
| 6 |
zero = torch.Tensor([0]).cuda()
|
| 7 |
print(zero.device) # <-- 'cpu' 🤔
|
|
@@ -15,24 +17,28 @@ def greet(n):
|
|
| 15 |
return f"Hello {zero + n} Tensor"
|
| 16 |
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
|
| 26 |
|
| 27 |
-
def reembed_main():
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
| 30 |
|
| 31 |
|
| 32 |
-
def download_dataset():
|
| 33 |
import datasets
|
| 34 |
|
| 35 |
-
|
|
|
|
|
|
|
| 36 |
|
| 37 |
|
| 38 |
demo = gr.Interface(
|
|
|
|
| 2 |
import spaces
|
| 3 |
import torch
|
| 4 |
import vdf_io
|
| 5 |
+
from sentence_transformers import SentenceTransformer
|
| 6 |
+
from rich import print as rprint
|
| 7 |
|
| 8 |
zero = torch.Tensor([0]).cuda()
|
| 9 |
print(zero.device) # <-- 'cpu' 🤔
|
|
|
|
| 17 |
return f"Hello {zero + n} Tensor"
|
| 18 |
|
| 19 |
|
| 20 |
+
@spaces.GPU
|
| 21 |
+
def reembed_dataset(ds, model):
|
| 22 |
+
model = SentenceTransformer(model, device=zero.device)
|
| 23 |
+
rprint(model)
|
| 24 |
+
rprint(model.encode("Hello, World!"))
|
| 25 |
+
ds.map(lambda x: model.encode(x["text"]))
|
| 26 |
+
rprint(ds[0])
|
| 27 |
|
| 28 |
|
| 29 |
+
def reembed_main(dataset_name, embedding_model, output_username):
|
| 30 |
+
print(f"{dataset_name=}, {embedding_model=}, {output_username=}")
|
| 31 |
+
ds = download_dataset(dataset_name)
|
| 32 |
+
reembed_dataset(ds, model=embedding_model)
|
| 33 |
+
return "Dataset re-embedded successfully"
|
| 34 |
|
| 35 |
|
| 36 |
+
def download_dataset(dataset_name):
|
| 37 |
import datasets
|
| 38 |
|
| 39 |
+
ds = datasets.load_dataset(dataset_name)
|
| 40 |
+
print(len(ds))
|
| 41 |
+
return ds
|
| 42 |
|
| 43 |
|
| 44 |
demo = gr.Interface(
|