Update README.md
Browse files
README.md
CHANGED
|
@@ -14,11 +14,31 @@ This model understands **instruction-style prompts** for generating code in mult
|
|
| 14 |
|
| 15 |
---
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
## How to Use
|
| 18 |
|
| 19 |
```python
|
| 20 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 21 |
import torch
|
|
|
|
| 22 |
|
| 23 |
model_id = "key-life/codegen-alpaca-1b"
|
| 24 |
|
|
@@ -31,5 +51,26 @@ prompt = "### Instruction:\nWrite a Python function to check if a number is prim
|
|
| 31 |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
| 32 |
|
| 33 |
# Generate code
|
| 34 |
-
outputs = model.generate(
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
---
|
| 16 |
|
| 17 |
+
## Requirements (Colab Setup)
|
| 18 |
+
|
| 19 |
+
If you are running this model on **Google Colab**, you’ll need to:
|
| 20 |
+
|
| 21 |
+
1. Go to the left sidebar and click the **🔑 (Secrets)** tab.
|
| 22 |
+
2. Add a new secret with the name:`HF_TOKEN` and set the value to your **Hugging Face token** (from https://huggingface.co/settings/tokens).
|
| 23 |
+
3. Enable **Notebook access** for your token.
|
| 24 |
+
4. Restart the Colab session.
|
| 25 |
+
|
| 26 |
+
Then log in inside the notebook:
|
| 27 |
+
|
| 28 |
+
```python
|
| 29 |
+
from huggingface_hub import login
|
| 30 |
+
import os
|
| 31 |
+
|
| 32 |
+
login(token=os.environ["HF_TOKEN"])
|
| 33 |
+
|
| 34 |
+
---
|
| 35 |
+
|
| 36 |
## How to Use
|
| 37 |
|
| 38 |
```python
|
| 39 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 40 |
import torch
|
| 41 |
+
import re
|
| 42 |
|
| 43 |
model_id = "key-life/codegen-alpaca-1b"
|
| 44 |
|
|
|
|
| 51 |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
| 52 |
|
| 53 |
# Generate code
|
| 54 |
+
outputs = model.generate(
|
| 55 |
+
**inputs,
|
| 56 |
+
max_new_tokens=128,
|
| 57 |
+
temperature=0.2, # more deterministic
|
| 58 |
+
top_p=0.9, # avoids rambling
|
| 59 |
+
do_sample=True,
|
| 60 |
+
eos_token_id=tokenizer.eos_token_id,
|
| 61 |
+
pad_token_id=tokenizer.eos_token_id
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
# Decode
|
| 65 |
+
decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 66 |
+
|
| 67 |
+
# Extract only code block
|
| 68 |
+
code_block = re.findall(r"```(?:python)?(.*?)```", decoded, re.DOTALL)
|
| 69 |
+
|
| 70 |
+
if code_block:
|
| 71 |
+
response = code_block[0].strip()
|
| 72 |
+
else:
|
| 73 |
+
response = decoded.split("### Response:")[-1].strip()
|
| 74 |
+
|
| 75 |
+
print(response)
|
| 76 |
+
|