Richard Guo
commited on
Commit
·
1779f92
1
Parent(s):
036b5da
nomic login
Browse files- Dockerfile +3 -3
- build_map.py +11 -6
- main.py +20 -18
- templates/form.html +3 -0
Dockerfile
CHANGED
|
@@ -26,9 +26,9 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|
| 26 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
| 27 |
COPY --chown=user . $HOME/app
|
| 28 |
|
| 29 |
-
# Expose the secret NOMIC_API_KEY at buildtime and use its value
|
| 30 |
-
RUN --mount=type=secret,id=NOMIC_API_KEY,mode=0444,required=true \
|
| 31 |
-
|
| 32 |
|
| 33 |
# Make port 7860 available to the world outside this container
|
| 34 |
EXPOSE 7860
|
|
|
|
| 26 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
| 27 |
COPY --chown=user . $HOME/app
|
| 28 |
|
| 29 |
+
# # Expose the secret NOMIC_API_KEY at buildtime and use its value
|
| 30 |
+
# RUN --mount=type=secret,id=NOMIC_API_KEY,mode=0444,required=true \
|
| 31 |
+
# nomic login $(cat /run/secrets/NOMIC_API_KEY)
|
| 32 |
|
| 33 |
# Make port 7860 available to the world outside this container
|
| 34 |
EXPOSE 7860
|
build_map.py
CHANGED
|
@@ -110,6 +110,7 @@ def load_dataset_and_metadata(dataset_name,
|
|
| 110 |
|
| 111 |
|
| 112 |
def upload_dataset_to_atlas(dataset_dict,
|
|
|
|
| 113 |
project_name = None,
|
| 114 |
unique_id_field_name=None,
|
| 115 |
indexed_field = None,
|
|
@@ -117,6 +118,7 @@ def upload_dataset_to_atlas(dataset_dict,
|
|
| 117 |
organization_name=None,
|
| 118 |
wait_for_map=True,
|
| 119 |
datum_limit=30000):
|
|
|
|
| 120 |
|
| 121 |
if modality is None:
|
| 122 |
modality = "text"
|
|
@@ -140,15 +142,18 @@ def upload_dataset_to_atlas(dataset_dict,
|
|
| 140 |
uncategorized_fields = get_datum_fields(dataset_dict)
|
| 141 |
|
| 142 |
|
| 143 |
-
# return longest string field
|
| 144 |
if indexed_field is None:
|
| 145 |
-
|
| 146 |
-
longest_len = 0
|
| 147 |
for field in string_fields:
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
indexed_field = field
|
| 150 |
-
longest_len = len(ex[field])
|
| 151 |
-
|
| 152 |
|
| 153 |
topic_label_field = None
|
| 154 |
if modality == "embedding":
|
|
|
|
| 110 |
|
| 111 |
|
| 112 |
def upload_dataset_to_atlas(dataset_dict,
|
| 113 |
+
atlas_api_token: str,
|
| 114 |
project_name = None,
|
| 115 |
unique_id_field_name=None,
|
| 116 |
indexed_field = None,
|
|
|
|
| 118 |
organization_name=None,
|
| 119 |
wait_for_map=True,
|
| 120 |
datum_limit=30000):
|
| 121 |
+
nomic.login(atlas_api_token)
|
| 122 |
|
| 123 |
if modality is None:
|
| 124 |
modality = "text"
|
|
|
|
| 142 |
uncategorized_fields = get_datum_fields(dataset_dict)
|
| 143 |
|
| 144 |
|
| 145 |
+
# return longest string field from 5 samples
|
| 146 |
if indexed_field is None:
|
| 147 |
+
longest_length = 0
|
|
|
|
| 148 |
for field in string_fields:
|
| 149 |
+
length = 0
|
| 150 |
+
for i in range(len(dataset_dict["head"])):
|
| 151 |
+
ex = dataset_dict["head"].take([i])
|
| 152 |
+
if ex[field]:
|
| 153 |
+
length += len(ex[field].split())
|
| 154 |
+
if length > longest_length:
|
| 155 |
+
longest_length = length
|
| 156 |
indexed_field = field
|
|
|
|
|
|
|
| 157 |
|
| 158 |
topic_label_field = None
|
| 159 |
if modality == "embedding":
|
main.py
CHANGED
|
@@ -12,7 +12,7 @@ from huggingface_hub import create_discussion, comment_discussion
|
|
| 12 |
from build_map import load_dataset_and_metadata, upload_dataset_to_atlas
|
| 13 |
from models import WebhookPayload
|
| 14 |
|
| 15 |
-
WEBHOOK_SECRET = os.environ.get("WEBHOOK_SECRET")
|
| 16 |
HUGGINGFACE_ACCESS_TOKEN = os.environ.get("HUGGINGFACE_ACCESS_TOKEN")
|
| 17 |
|
| 18 |
app = FastAPI()
|
|
@@ -20,12 +20,13 @@ app = FastAPI()
|
|
| 20 |
tasks = {}
|
| 21 |
templates = Jinja2Templates(directory="templates")
|
| 22 |
|
| 23 |
-
def upload_atlas_task(task_id,
|
| 24 |
-
dataset_name,
|
|
|
|
| 25 |
webhook_payload: WebhookPayload = None,
|
| 26 |
webhook_notify: bool = False):
|
| 27 |
dataset_dict = load_dataset_and_metadata(dataset_name)
|
| 28 |
-
map_url = upload_dataset_to_atlas(dataset_dict)
|
| 29 |
tasks[task_id]['status'] = 'done'
|
| 30 |
tasks[task_id]['url'] = map_url
|
| 31 |
tasks[task_id]['finish_time'] = time.time()
|
|
@@ -61,18 +62,18 @@ async def cleanup_tasks():
|
|
| 61 |
del tasks[task_id]
|
| 62 |
await asyncio.sleep(1800) # Wait for 30 minutes
|
| 63 |
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
#
|
| 67 |
-
|
| 68 |
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
#
|
| 74 |
-
|
| 75 |
-
|
| 76 |
|
| 77 |
@app.get("/status/{task_id}")
|
| 78 |
async def read_task(task_id: str):
|
|
@@ -85,8 +86,9 @@ async def read_task(task_id: str):
|
|
| 85 |
async def post_webhook(background_tasks: BackgroundTasks, payload: WebhookPayload, x_webhook_secret: Optional[str] = Header(default=None)):
|
| 86 |
if x_webhook_secret is None:
|
| 87 |
raise HTTPException(401)
|
| 88 |
-
|
| 89 |
-
|
|
|
|
| 90 |
|
| 91 |
if not (
|
| 92 |
payload.event.action == "update"
|
|
@@ -98,5 +100,5 @@ async def post_webhook(background_tasks: BackgroundTasks, payload: WebhookPayloa
|
|
| 98 |
task_id = str(uuid4())
|
| 99 |
tasks[task_id] = {'status': 'running'}
|
| 100 |
#form_data = DatasetForm(dataset_name=dataset_name)
|
| 101 |
-
background_tasks.add_task(upload_atlas_task, task_id, payload.repo.name, payload, True)
|
| 102 |
return {'task_id': task_id}
|
|
|
|
| 12 |
from build_map import load_dataset_and_metadata, upload_dataset_to_atlas
|
| 13 |
from models import WebhookPayload
|
| 14 |
|
| 15 |
+
# WEBHOOK_SECRET = os.environ.get("WEBHOOK_SECRET")
|
| 16 |
HUGGINGFACE_ACCESS_TOKEN = os.environ.get("HUGGINGFACE_ACCESS_TOKEN")
|
| 17 |
|
| 18 |
app = FastAPI()
|
|
|
|
| 20 |
tasks = {}
|
| 21 |
templates = Jinja2Templates(directory="templates")
|
| 22 |
|
| 23 |
+
def upload_atlas_task(task_id: str,
|
| 24 |
+
dataset_name: str,
|
| 25 |
+
atlas_api_token: str,
|
| 26 |
webhook_payload: WebhookPayload = None,
|
| 27 |
webhook_notify: bool = False):
|
| 28 |
dataset_dict = load_dataset_and_metadata(dataset_name)
|
| 29 |
+
map_url = upload_dataset_to_atlas(dataset_dict, atlas_api_token)
|
| 30 |
tasks[task_id]['status'] = 'done'
|
| 31 |
tasks[task_id]['url'] = map_url
|
| 32 |
tasks[task_id]['finish_time'] = time.time()
|
|
|
|
| 62 |
del tasks[task_id]
|
| 63 |
await asyncio.sleep(1800) # Wait for 30 minutes
|
| 64 |
|
| 65 |
+
@app.get("/", response_class=HTMLResponse)
|
| 66 |
+
async def read_form(request: Request):
|
| 67 |
+
# Render the form.html template
|
| 68 |
+
return templates.TemplateResponse("form.html", {"request": request})
|
| 69 |
|
| 70 |
+
@app.post("/submit_form")
|
| 71 |
+
async def form_post(background_tasks: BackgroundTasks, dataset_name: str = Form(...), atlas_api_token: str = Form(...)):
|
| 72 |
+
task_id = str(uuid4())
|
| 73 |
+
tasks[task_id] = {'status': 'running'}
|
| 74 |
+
#form_data = DatasetForm(dataset_name=dataset_name)
|
| 75 |
+
background_tasks.add_task(upload_atlas_task, task_id, dataset_name, atlas_api_token)
|
| 76 |
+
return {'task_id': task_id}
|
| 77 |
|
| 78 |
@app.get("/status/{task_id}")
|
| 79 |
async def read_task(task_id: str):
|
|
|
|
| 86 |
async def post_webhook(background_tasks: BackgroundTasks, payload: WebhookPayload, x_webhook_secret: Optional[str] = Header(default=None)):
|
| 87 |
if x_webhook_secret is None:
|
| 88 |
raise HTTPException(401)
|
| 89 |
+
|
| 90 |
+
# if x_webhook_secret != WEBHOOK_SECRET:
|
| 91 |
+
# raise HTTPException(403)
|
| 92 |
|
| 93 |
if not (
|
| 94 |
payload.event.action == "update"
|
|
|
|
| 100 |
task_id = str(uuid4())
|
| 101 |
tasks[task_id] = {'status': 'running'}
|
| 102 |
#form_data = DatasetForm(dataset_name=dataset_name)
|
| 103 |
+
background_tasks.add_task(upload_atlas_task, task_id, payload.repo.name, x_webhook_secret, payload, True)
|
| 104 |
return {'task_id': task_id}
|
templates/form.html
CHANGED
|
@@ -58,6 +58,9 @@
|
|
| 58 |
<div class="form-group">
|
| 59 |
<label for="dataset_name">Dataset Name:</label>
|
| 60 |
<input type="text" class="form-control" id="dataset_name" name="dataset_name">
|
|
|
|
|
|
|
|
|
|
| 61 |
</div>
|
| 62 |
<button type="submit" class="btn btn-primary">Submit</button>
|
| 63 |
</form>
|
|
|
|
| 58 |
<div class="form-group">
|
| 59 |
<label for="dataset_name">Dataset Name:</label>
|
| 60 |
<input type="text" class="form-control" id="dataset_name" name="dataset_name">
|
| 61 |
+
|
| 62 |
+
<label for="atlas_api_token">Atlas API Token:</label>
|
| 63 |
+
<input type="text" class="form-control" id="atlas_api_token" name="atlas_api_token">
|
| 64 |
</div>
|
| 65 |
<button type="submit" class="btn btn-primary">Submit</button>
|
| 66 |
</form>
|