amaye15
commited on
Commit
·
10656cf
1
Parent(s):
10c2fec
webhook
Browse files
app.py
CHANGED
|
@@ -1,8 +1,11 @@
|
|
| 1 |
import os
|
| 2 |
import shutil
|
| 3 |
import logging
|
| 4 |
-
from
|
|
|
|
| 5 |
from datasets import Dataset, load_dataset, disable_caching
|
|
|
|
|
|
|
| 6 |
|
| 7 |
disable_caching()
|
| 8 |
|
|
@@ -17,9 +20,11 @@ formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(messag
|
|
| 17 |
console_handler.setFormatter(formatter)
|
| 18 |
logger.addHandler(console_handler)
|
| 19 |
|
|
|
|
| 20 |
DS_NAME = "amaye15/object-segmentation"
|
| 21 |
DATA_DIR = "data"
|
| 22 |
TARGET_REPO = "amaye15/tmp"
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
def get_data():
|
|
@@ -52,23 +57,70 @@ def process_and_push_data():
|
|
| 52 |
logger.info("Data processed and pushed to the hub.")
|
| 53 |
|
| 54 |
|
| 55 |
-
# Initialize the WebhooksServer
|
| 56 |
-
app = WebhooksServer(webhook_secret=
|
| 57 |
|
| 58 |
|
| 59 |
-
@
|
| 60 |
-
async def
|
|
|
|
|
|
|
| 61 |
"""
|
| 62 |
Webhook endpoint that triggers data processing when the dataset is updated.
|
| 63 |
"""
|
| 64 |
-
if
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
|
| 73 |
-
|
| 74 |
-
app.launch()
|
|
|
|
| 1 |
import os
|
| 2 |
import shutil
|
| 3 |
import logging
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from huggingface_hub import WebhooksServer, WebhookPayload
|
| 6 |
from datasets import Dataset, load_dataset, disable_caching
|
| 7 |
+
from fastapi import BackgroundTasks, Response, status
|
| 8 |
+
from huggingface_hub.utils import build_hf_headers, get_session
|
| 9 |
|
| 10 |
disable_caching()
|
| 11 |
|
|
|
|
| 20 |
console_handler.setFormatter(formatter)
|
| 21 |
logger.addHandler(console_handler)
|
| 22 |
|
| 23 |
+
# Environment variables
|
| 24 |
DS_NAME = "amaye15/object-segmentation"
|
| 25 |
DATA_DIR = "data"
|
| 26 |
TARGET_REPO = "amaye15/tmp"
|
| 27 |
+
WEBHOOK_SECRET = os.getenv("HF_WEBHOOK_SECRET", "my_secret_key")
|
| 28 |
|
| 29 |
|
| 30 |
def get_data():
|
|
|
|
| 57 |
logger.info("Data processed and pushed to the hub.")
|
| 58 |
|
| 59 |
|
| 60 |
+
# Initialize the WebhooksServer with Gradio interface (if needed)
|
| 61 |
+
app = WebhooksServer(webhook_secret=WEBHOOK_SECRET)
|
| 62 |
|
| 63 |
|
| 64 |
+
@app.add_webhook("/dataset_repo")
|
| 65 |
+
async def handle_repository_changes(
|
| 66 |
+
payload: WebhookPayload, task_queue: BackgroundTasks
|
| 67 |
+
):
|
| 68 |
"""
|
| 69 |
Webhook endpoint that triggers data processing when the dataset is updated.
|
| 70 |
"""
|
| 71 |
+
if not payload.event.scope.startswith("repo"):
|
| 72 |
+
return Response("No task scheduled", status_code=status.HTTP_200_OK)
|
| 73 |
+
|
| 74 |
+
# Only run if change is on main branch
|
| 75 |
+
try:
|
| 76 |
+
if payload.updatedRefs[0].ref != "refs/heads/main":
|
| 77 |
+
response_content = "No task scheduled: Change not on main branch"
|
| 78 |
+
logger.info(response_content)
|
| 79 |
+
return Response(response_content, status_code=status.HTTP_200_OK)
|
| 80 |
+
except Exception as e:
|
| 81 |
+
logger.error(f"Error checking branch: {str(e)}")
|
| 82 |
+
return Response("No task scheduled", status_code=status.HTTP_200_OK)
|
| 83 |
+
|
| 84 |
+
# No need to run for README only updates
|
| 85 |
+
try:
|
| 86 |
+
commit_files_url = f"{payload.repo.url.api}/compare/{payload.updatedRefs[0].oldSha}..{payload.updatedRefs[0].newSha}?raw=true"
|
| 87 |
+
response_text = (
|
| 88 |
+
get_session().get(commit_files_url, headers=build_hf_headers()).text
|
| 89 |
+
)
|
| 90 |
+
logger.info(f"Git Compare URL: {commit_files_url}")
|
| 91 |
+
|
| 92 |
+
# Splitting the output into lines
|
| 93 |
+
file_lines = response_text.split("\n")
|
| 94 |
+
|
| 95 |
+
# Filtering the lines to find file changes
|
| 96 |
+
changed_files = [line.split("\t")[-1] for line in file_lines if line.strip()]
|
| 97 |
+
logger.info(f"Changed files: {changed_files}")
|
| 98 |
+
|
| 99 |
+
# Checking if only README.md has been changed
|
| 100 |
+
if all("README.md" in file for file in changed_files):
|
| 101 |
+
response_content = "No task scheduled: It's a README only update."
|
| 102 |
+
logger.info(response_content)
|
| 103 |
+
return Response(response_content, status_code=status.HTTP_200_OK)
|
| 104 |
+
except Exception as e:
|
| 105 |
+
logger.error(f"Error checking files: {str(e)}")
|
| 106 |
+
return Response("Unexpected issue", status_code=status.HTTP_501_NOT_IMPLEMENTED)
|
| 107 |
+
|
| 108 |
+
logger.info(
|
| 109 |
+
f"Webhook received from {payload.repo.name} indicating a repo {payload.event.action}"
|
| 110 |
+
)
|
| 111 |
+
task_queue.add_task(_process_webhook)
|
| 112 |
+
return Response("Task scheduled.", status_code=status.HTTP_202_ACCEPTED)
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def _process_webhook():
|
| 116 |
+
logger.info(f"Loading new dataset...")
|
| 117 |
+
dataset = load_dataset(DS_NAME)
|
| 118 |
+
logger.info(f"Loaded new dataset")
|
| 119 |
+
|
| 120 |
+
logger.info(f"Processing and updating dataset...")
|
| 121 |
+
process_and_push_data()
|
| 122 |
+
logger.info(f"Processing and updating dataset completed!")
|
| 123 |
|
| 124 |
|
| 125 |
+
if __name__ == "__main__":
|
| 126 |
+
app.launch(server_name="0.0.0.0", show_error=True, server_port=7860)
|