dgaff
/

bsky_user_classifier

Model card Files Files and versions

dgaff commited on Nov 15, 2024

Commit

cb65d6c

·

verified ·

1 Parent(s): ac6a894

Update README.md

Files changed (1) hide show

README.md +83 -1

README.md CHANGED Viewed

@@ -2,4 +2,86 @@
 license: mit
 base_model:
 - distilbert/distilbert-base-uncased
----

 license: mit
 base_model:
 - distilbert/distilbert-base-uncased
+---
+Deepest apologies for how fucked up this is, but:
+```
+import os
+import sys
+import json
+import torch
+from huggingface_hub import hf_hub_download
+import importlib.util
+# Repository ID and filenames
+repo_id = "dgaff/bsky_user_classifier"
+files_to_download = {
+    "model_weights": "multioutput_regressor.pth",
+    "train_script": "train.py",
+    "data_processing": "data_processing.py",
+    "utils": "utils.py",
+    "label_mappings": "label_mappings.json",
+}
+# Download necessary files
+model_weights_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["model_weights"])
+train_script_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["train_script"])
+data_processing_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["data_processing"])
+util_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["utils"])
+label_mappings_path = hf_hub_download(repo_id=repo_id, filename=files_to_download["label_mappings"])
+# Update sys.path to include dependencies
+for path in [data_processing_path, util_path]:
+    dir_path = os.path.dirname(path)
+    if dir_path not in sys.path:
+        sys.path.append(dir_path)
+# Load train.py as a module
+spec = importlib.util.spec_from_file_location("train_module", train_script_path)
+train_module = importlib.util.module_from_spec(spec)
+sys.modules["train_module"] = train_module
+spec.loader.exec_module(train_module)
+# Load label mappings
+with open(label_mappings_path) as f:
+    label_mappings = json.load(f)
+# Initialize the model
+hidden_size = 768  # Ensure this matches your model's configuration
+num_outputs = 23   # Update if different
+model = train_module.MultiOutputRegressor(hidden_size=hidden_size, num_outputs=num_outputs)
+# Load weights and set model to evaluation mode
+model.load_state_dict(torch.load(model_weights_path, map_location=torch.device('cpu')))
+model.eval()
+# Set device
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+model.to(device)
+# Prepare input sentences and generate embeddings
+new_sentences = [
+    "This is a test sentence.",
+    "Another example of a sentence to predict."
+]
+embedder = train_module.EmbeddingGenerator()
+new_embeddings = embedder.generate_embeddings(new_sentences)
+new_embeddings_tensor = torch.tensor(new_embeddings, dtype=torch.float).to(device)
+# Generate predictions
+with torch.no_grad():
+    predictions = model(new_embeddings_tensor).cpu().numpy()
+# Map predictions to labels and print results
+for sentence, pred in zip(new_sentences, predictions):
+    label_pred_dict = {label_mappings["id2label"][str(i)]: float(pred[i]) for i in range(len(pred))}
+    print(f"Sentence: {sentence}")
+    print("Predictions:")
+    for label, value in label_pred_dict.items():
+        print(f"  {label}: {value}")
+    print()
+```
+I'll do better next time