Initial release: Privacy Comparator LoRA

Files changed (8) hide show

.gitattributes +1 -0
README.md +145 -3
adapter_config.json +39 -0
adapter_model.safetensors +3 -0
base_model.txt +1 -0
comparator_config.json +11 -0
stats.json +1 -0
train_config.json +1 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+adapter_model.safetensors filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,3 +1,145 @@
----
-license: apache-2.0
----

+---
+library_name: peft
+base_model: Qwen/Qwen2.5-7B-Instruct
+pipeline_tag: text-classification
+---
+# Privacy Comparator
+A learned model for pairwise comparison of privacy strength between messages.
+---
+## Model Details
+### Model Description
+Privacy Comparator is a learned model that compares two messages and determines which provides stronger protection of personal or sensitive information.
+Given two inputs:
+```
+A: message
+B: message
+```
+the model outputs:
+```
+A    → message A is more privacy-preserving
+B    → message B is more privacy-preserving
+SAME → comparable privacy strength
+```
+The model performs **relative privacy comparison** and can be applied to arbitrary message pairs, regardless of how they were generated.
+It does **not**:
+- detect PII
+- assign absolute privacy scores
+- generate redactions
+Instead, it learns a preference relation over messages in terms of privacy strength.
+---
+### Base Model
+Finetuned from: Qwen/Qwen2.5-7B-Instruct
+Implemented as a LoRA adapter.
+---
+### License
+This adapter inherits the license constraints of the base model.
+---
+## Uses
+### Intended Use
+- Privacy-preserving text comparison
+- Ranking anonymization strategies
+- Evaluating relative disclosure risk
+For example, when multiple transformation strategies are applied to the same input:
+```
+m_i = τ(x; a_i)
+```
+where:
+- `x` is the original message
+- `a_i` is a transformation strategy (e.g., redact, abstract, retain sensitive spans)
+- `τ` applies the chosen strategy to produce a privacy-preserving version
+Example:
+Original message:
+```
+Lucy lives at 139 Tremont St in Boston.
+```
+Different strategies may produce:
+```
+m₁: [NAME1] lives at [ADDRESS1] in [CITY1].
+m₂: A person lives at a residential address in a major city in U.S.
+m₃: A person lives at [ADDRESS1] in Boston.
+```
+The comparator can rank such variants based on which better protects sensitive information.
+For more details on the transformation framework, please refer to the associated paper.
+---
+### Out-of-Scope Use
+This model is **not intended for**:
+- PII detection
+- Safety moderation
+- Utility evaluation
+- Generating anonymized text
+It performs relative comparison only.
+---
+## Training Details
+- LoRA rank: 8
+- Learning rate: 1e-4
+- Epochs: 2
+- Context length: 2048
+- Global batch size: 2048
+Training performed using Fireworks AI.
+---
+## Model Outputs
+The model produces structured JSON decisions:
+```json
+{
+  "reason": "...",
+  "response": "A" | "B" | "SAME"
+}
+```
+---
+## Resources
+Paper: [OpenReview](https://iclr.cc/virtual/2026/poster/10007115)
+Code: [Operationalize Data Minimization](https://github.com/PEACH-Research-Lab/Operationalize-Data-Minimization)
+For full details of the transformation framework and action search procedure, please refer to the paper.

adapter_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "up_proj",
+    "gate_proj",
+    "q_proj",
+    "o_proj",
+    "v_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bbf1e68ca93dce3d5615e2fe7d8c518fb85ad96ec684c45b7dddd248459a3a95
+size 40422208

base_model.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ Qwen/Qwen2.5-7B-Instruct

comparator_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "model_role": "privacy_comparator",
+  "task": "pairwise_privacy_ranking",
+  "input": {
+    "message_A": "text",
+    "message_B": "text"
+  },
+  "output": {
+    "response": ["A", "B", "SAME"]
+  }
+}

stats.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"world_size": 1, "epochs": 2, "steps": 4371, "seqs": 9550, "tokens": 7050446, "last_epoch_steps": 0, "last_epoch_seqs": 0, "last_epoch_tokens": 0, "experiment_tracking_run_id": null, "loss_ema": 0.7818833695804316, "loss_sum": 30.961968302726746, "mtp_loss_ema": 0, "mtp_loss_sum": 0, "eval_losses_avg": [0.9667969819826957, 0.8918106800470597, 0.844005211805686, 0.8168310828697987, 0.7929086157908807, 0.79764097623336, 0.7486214874646603, 0.7394010134232349, 0.7235642022047287, 0.7187379384652163, 0.7143815450179272, 0.7060007116733453]}

train_config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"comet": false, "comet_api_key": null, "comet_workspace": null, "comet_project": null, "comet_run_id": "zlvhfkb7", "wandb": false, "wandb_api_key": null, "wandb_entity": null, "wandb_project": null, "wandb_run_id": "zlvhfkb7", "base_model_dir": "/llm-downloader-destination/base/fireworks/qwen2p5-7b-instruct/hf", "output_model_dir": "gs://fireworks-artifacts-finetunerobot-83445b/sftj-zlvhfkb7/029e24/ft-zlvhfkb7-xjty1/checkpoint", "checkpoint_dir": "/dev/shm/checkpoints", "gcs_checkpoint_dir": "gs://fireworks-fine-tuning-checkpoints/sftj-finetunerobot-zlvhfkb7-6b4cfbb3-d5e4-4d60-aaa4-1829e7847c4f/checkpoints", "max_checkpoints_to_keep": 1, "checkpoint_interval": 3600, "save_final_checkpoint": false, "train": true, "learning_rate": 0.0001, "learning_rate_warmup_steps": 0, "grad_accum_steps": 1, "epochs": 2, "early_stop": true, "seed": 42, "dataset_dir": "/mnt/staging/dataset", "eval_auto_carveout": true, "eval_dataset_dir": null, "train_limit": null, "max_context_len": 2048, "batch_size": 2048, "min_evals_per_epoch": 4, "precision": null, "status_file": "gs://fireworks-fine-tuning-job-status/sftj-finetunerobot-zlvhfkb7-6b4cfbb3-d5e4-4d60-aaa4-1829e7847c4f", "billing_file": "gs://fireworks-fine-tuning-metadata/sftj-finetunerobot-zlvhfkb7/billing-6b4cfbb3-d5e4-4d60-aaa4-1829e7847c4f", "metrics_file": "gs://fireworks-fine-tuning-metadata/sftj-finetunerobot-zlvhfkb7/metrics.jsonl", "profile": null, "weight_sharding": null, "activation_sharding": null, "empty_weights": false, "peft_addon_dir": null, "lora_rank": 8, "lora_dropout": 0.05, "template_kind": "conversation", "template": null, "eval_train_ratio": 0.02, "mtp_config": {"enable_mtp": false, "freeze_base_model": false, "num_draft_tokens": 1}, "qat": true}