derek-thomas
commited on
Commit
·
d9a1859
1
Parent(s):
65f2fab
Updating Embeddings space
Browse files- src/build_nomic.py +16 -0
src/build_nomic.py
CHANGED
|
@@ -4,10 +4,14 @@ import pandas as pd
|
|
| 4 |
|
| 5 |
import nomic
|
| 6 |
from nomic import atlas
|
|
|
|
| 7 |
import numpy as np
|
| 8 |
|
|
|
|
|
|
|
| 9 |
NOMIC_KEY = os.getenv('NOMIC_KEY')
|
| 10 |
nomic.login(NOMIC_KEY)
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
def count_words(text):
|
|
@@ -37,9 +41,21 @@ def build_nomic(dataset):
|
|
| 37 |
|
| 38 |
df['word_count'] = df['content'].apply(count_words)
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
# Create Atlas project
|
|
|
|
| 41 |
project = atlas.map_data(embeddings=np.stack(df['embedding'].values),
|
| 42 |
data=df[non_embedding_columns].to_dict(orient='records'),
|
| 43 |
id_field='id',
|
| 44 |
identifier='BORU Subreddit Neural Search',
|
| 45 |
)
|
|
|
|
|
|
| 4 |
|
| 5 |
import nomic
|
| 6 |
from nomic import atlas
|
| 7 |
+
from nomic.dataset import AtlasClass
|
| 8 |
import numpy as np
|
| 9 |
|
| 10 |
+
from src.my_logger import setup_logger
|
| 11 |
+
|
| 12 |
NOMIC_KEY = os.getenv('NOMIC_KEY')
|
| 13 |
nomic.login(NOMIC_KEY)
|
| 14 |
+
logger = setup_logger(__name__)
|
| 15 |
|
| 16 |
|
| 17 |
def count_words(text):
|
|
|
|
| 41 |
|
| 42 |
df['word_count'] = df['content'].apply(count_words)
|
| 43 |
|
| 44 |
+
|
| 45 |
+
logger.info(f"Trying to delete old version of nomic Atlas...")
|
| 46 |
+
try:
|
| 47 |
+
ac = AtlasClass()
|
| 48 |
+
atlas_id = ac._get_dataset_by_slug_identifier("derek2/boru-subreddit-neural-search")['id']
|
| 49 |
+
ac._delete_project_by_id(atlas_id)
|
| 50 |
+
logger.info(f"Succeeded in deleting old version of nomic Atlas.")
|
| 51 |
+
except:
|
| 52 |
+
logger.info(f"Failed to delete old version of nomic Atlas.")
|
| 53 |
+
|
| 54 |
# Create Atlas project
|
| 55 |
+
logger.info(f"Trying to create new version of Atlas...")
|
| 56 |
project = atlas.map_data(embeddings=np.stack(df['embedding'].values),
|
| 57 |
data=df[non_embedding_columns].to_dict(orient='records'),
|
| 58 |
id_field='id',
|
| 59 |
identifier='BORU Subreddit Neural Search',
|
| 60 |
)
|
| 61 |
+
logger.info(f"Succeeded in creating new version of nomic Atlas.")
|