traromal commited on
Commit
679274e
·
verified ·
1 Parent(s): 73f0234

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +100 -21
README.md CHANGED
@@ -1,21 +1,100 @@
1
- ---
2
- base_model: unsloth/gemma-3-270m-it
3
- tags:
4
- - text-generation-inference
5
- - transformers
6
- - unsloth
7
- - gemma3_text
8
- license: apache-2.0
9
- language:
10
- - en
11
- ---
12
-
13
- # Uploaded finetuned model
14
-
15
- - **Developed by:** traromal
16
- - **License:** apache-2.0
17
- - **Finetuned from model :** unsloth/gemma-3-270m-it
18
-
19
- This gemma3_text model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
20
-
21
- [<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## 💡 Example Inference Code
2
+
3
+ You can try this PII Masking model directly with the following script:
4
+
5
+ ```python
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
7
+ import torch
8
+
9
+ # ----------------------------
10
+ # Load model & tokenizer
11
+ # ----------------------------
12
+ model_name = "traromal/AIccel_entity_masker_Gemma3_270m"
13
+ print(f"Loading model: {model_name}")
14
+
15
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
16
+ model = AutoModelForCausalLM.from_pretrained(
17
+ model_name,
18
+ trust_remote_code=True,
19
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
20
+ low_cpu_mem_usage=True
21
+ )
22
+
23
+ device = "cuda" if torch.cuda.is_available() else "cpu"
24
+ model = model.to(device)
25
+ print(f"✅ Model loaded on {device}")
26
+
27
+ # ----------------------------
28
+ # System prompt
29
+ # ----------------------------
30
+ SYSTEM_PROMPT = """You are a global data privacy expert.
31
+ Identify and mask all PII (Personally Identifiable Information) in text.
32
+ Replace each with an appropriate tag like [NAME], [AADHAR_NUMBER], [PHONE], etc.
33
+ Also list detected entities with their type and sensitivity level."""
34
+
35
+ # ----------------------------
36
+ # Masking function
37
+ # ----------------------------
38
+ def mask_pii(text, stream=False):
39
+ messages = [
40
+ {'role': 'system', 'content': SYSTEM_PROMPT},
41
+ {'role': 'user', 'content': f'Mask all sensitive PII in:\n\n"{text}"'}
42
+ ]
43
+
44
+ chat = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
45
+ inputs = tokenizer(chat, return_tensors="pt").to(device)
46
+
47
+ if stream:
48
+ streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
49
+ model.generate(
50
+ **inputs,
51
+ max_new_tokens=512,
52
+ temperature=0.7,
53
+ top_p=0.9,
54
+ top_k=50,
55
+ do_sample=True,
56
+ streamer=streamer,
57
+ pad_token_id=tokenizer.pad_token_id,
58
+ eos_token_id=tokenizer.eos_token_id,
59
+ )
60
+ else:
61
+ outputs = model.generate(
62
+ **inputs,
63
+ max_new_tokens=512,
64
+ temperature=0.7,
65
+ top_p=0.9,
66
+ top_k=50,
67
+ do_sample=True,
68
+ pad_token_id=tokenizer.pad_token_id,
69
+ eos_token_id=tokenizer.eos_token_id,
70
+ )
71
+ response = tokenizer.decode(outputs[0], skip_special_tokens=False)
72
+ if "<start_of_turn>model" in response:
73
+ response = response.split("<start_of_turn>model")[-1].replace("<end_of_turn>", "").strip()
74
+ print(response)
75
+ return response
76
+
77
+ # ----------------------------
78
+ # Quick examples
79
+ # ----------------------------
80
+ examples = [
81
+ "My name is Rajesh Kumar and my Aadhar number is 1234-5678-9012. Contact me at +91-9876543210.",
82
+ "Patient Priya Sharma, Blood Group: B+, UHID: MH2023-12345, DOB: 15/08/1990.",
83
+ "Please transfer ₹50,000 to account 123456789012 (IFSC: HDFC0001234). UPI ID: amit.kumar@paytm.",
84
+ "John Smith, SSN: 123-45-6789, email: john.smith@gmail.com",
85
+ ]
86
+
87
+ for text in examples:
88
+ print("\n🧩 Original:", text)
89
+ print("🔒 Masked:")
90
+ mask_pii(text, stream=True)
91
+ print("=" * 80)
92
+
93
+ # ----------------------------
94
+ # Interactive mode
95
+ # ----------------------------
96
+ while True:
97
+ user_text = input("\n🔒 Enter text to mask (or 'exit'): ").strip()
98
+ if user_text.lower() in ["exit", "quit", "q"]:
99
+ break
100
+ mask_pii(user_text, stream=True)