| import os |
| from pathlib import Path |
|
|
| import gradio as gr |
| from huggingface_hub import WebhookPayload, WebhooksServer |
|
|
| from src.my_logger import setup_logger |
| from src.utilities import load_datasets, merge_and_update_datasets |
| from src.visualize_logs import log_file_to_html_string |
| from src.build_nomic import build_nomic |
|
|
| proj_dir = Path(__name__).parent |
|
|
| logger = setup_logger(__name__) |
| logger.info("Starting Application...") |
|
|
| SUBREDDIT = os.environ["SUBREDDIT"] |
| USERNAME = os.environ["USERNAME"] |
| OG_DATASET = f"{USERNAME}/dataset-creator-reddit-{SUBREDDIT}" |
| PROCESSED_DATASET = os.environ['PROCESSED_DATASET'] |
| HUGGINGFACE_AUTH_TOKEN = os.environ["HUGGINGFACE_AUTH_TOKEN"] |
| WEBHOOK_SECRET = os.getenv("HF_WEBHOOK_SECRET", 'secret') |
|
|
| intro_md = """ |
| # Processing BORU |
| This space is triggered by a webhook for changes on |
| [derek-thomas/dataset-creator-reddit-bestofredditorupdates](https://huggingface.co/datasets/derek-thomas/dataset-creator-reddit-bestofredditorupdates). |
| It then takes the updates from that dataset and get embeddings and puts the results in |
| [https://huggingface.co/datasets/derek-thomas/reddit-bestofredditorupdates-processed](https://huggingface.co/datasets/derek-thomas/reddit-bestofredditorupdates-processed) |
| |
| Check out the original on [Nomic](https://atlas.nomic.ai/data/derek2/boru-subreddit-neural-search/map) |
| """ |
|
|
| html_str = """ |
| <html> |
| |
| <head> |
| <title>conll2003</title> |
| <style> |
| body { |
| font-family: Arial, sans-serif; |
| background-color: #f0f0f0; |
| display: flex; |
| justify-content: center; |
| align-items: center; |
| height: 100vh; |
| margin: 0; |
| padding: 0; |
| color: #333; |
| } |
| .iframe-container { |
| border: 1px solid #ccc; |
| border-radius: 10px; |
| overflow: hidden; |
| width: 80%; |
| height: 80%; |
| box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); |
| } |
| iframe { |
| width: 100%; |
| height: 100%; |
| border: none; |
| } |
| </style> |
| </head> |
| |
| <body> |
| <div class="iframe-container"> |
| <iframe src="https://atlas.nomic.ai/data/derek2/boru-subreddit-neural-search/map/cdd8c890-2fac-4ea6-91f8-e6821203cfcb" allow="clipboard-read; clipboard-write" |
| title="Nomic Atlas"></iframe> |
| </div> |
| </body> |
| |
| </html>""" |
|
|
| with gr.Blocks() as ui: |
| with gr.Tab("Application"): |
| gr.Markdown(intro_md) |
| gr.HTML(html_str) |
| with gr.Tab("Logs"): |
| gr.Markdown("# Logs") |
| output = gr.HTML(log_file_to_html_string, every=1) |
|
|
| app = WebhooksServer(ui=ui.queue(), webhook_secret=WEBHOOK_SECRET) |
|
|
|
|
| @app.add_webhook("/dataset_repo") |
| async def community(payload: WebhookPayload): |
| if payload.event.scope.startswith("repo"): |
| logger.info(f"Webhook received from {payload.repo.name} indicating a repo {payload.event.action}") |
| else: |
| return |
|
|
| logger.info(f"Loading new dataset...") |
| dataset, original_dataset = load_datasets() |
| logger.info(f"Loaded new dataset") |
|
|
| logger.info(f"Merging and Updating row...") |
| dataset = merge_and_update_datasets(dataset, original_dataset) |
|
|
| |
| logger.info(f"Pushing processed data to the Hugging Face Hub...") |
| dataset.push_to_hub(PROCESSED_DATASET, token=HUGGINGFACE_AUTH_TOKEN) |
| logger.info(f"Pushed processed data to the Hugging Face Hub") |
|
|
| logger.info(f"Building Nomic...") |
| build_nomic(dataset=dataset) |
| logger.info(f"Built Nomic") |
|
|
| if __name__ == '__main__': |
| app.launch(server_name="0.0.0.0", show_error=True, server_port=7860) |
| |
|
|