anaspro commited on
Commit
ad30712
·
1 Parent(s): 5b2f9a5

Update to GPT-OSS-20B model with 4-bit quantization

Browse files

- Switch from Lahja-iraqi-4B to unsloth/gpt-oss-20b-unsloth-bnb-4bit
- Add BitsAndBytesConfig for 4-bit quantization support
- Update Spaces metadata for GPT-OSS model
- Add .gitattributes for proper LFS handling
- Update README with model description and features

Files changed (4) hide show
  1. .gitattributes +2 -4
  2. README.md +14 -5
  3. app.py +17 -5
  4. config.json +4 -4
.gitattributes CHANGED
@@ -23,13 +23,11 @@
23
  *.pth filter=lfs diff=lfs merge=lfs -text
24
  *.rar filter=lfs diff=lfs merge=lfs -text
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
  *.wasm filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
23
  *.pth filter=lfs diff=lfs merge=lfs -text
24
  *.rar filter=lfs diff=lfs merge=lfs -text
25
  *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ *.tar.gz filter=lfs diff=lfs merge=lfs -text
 
 
27
  *.tflite filter=lfs diff=lfs merge=lfs -text
28
  *.tgz filter=lfs diff=lfs merge=lfs -text
29
  *.wasm filter=lfs diff=lfs merge=lfs -text
30
  *.xz filter=lfs diff=lfs merge=lfs -text
31
  *.zip filter=lfs diff=lfs merge=lfs -text
32
  *.zst filter=lfs diff=lfs merge=lfs -text
33
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Chatbox
3
- emoji: 💬
4
- colorFrom: yellow
5
- colorTo: purple
6
  sdk: gradio
7
  sdk_version: 5.42.0
8
  app_file: app.py
@@ -10,6 +10,15 @@ pinned: false
10
  hf_oauth: true
11
  hf_oauth_scopes:
12
  - inference-api
 
 
 
 
 
 
 
13
  ---
14
 
15
- An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
 
 
 
1
  ---
2
+ title: GPT-OSS-20B Chat Assistant
3
+ emoji: 🤖
4
+ colorFrom: blue
5
+ colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 5.42.0
8
  app_file: app.py
 
10
  hf_oauth: true
11
  hf_oauth_scopes:
12
  - inference-api
13
+ models:
14
+ - unsloth/gpt-oss-20b-unsloth-bnb-4bit
15
+ tags:
16
+ - gpt-oss
17
+ - reasoning
18
+ - chat
19
+ - arabic
20
  ---
21
 
22
+ A powerful chatbot powered by GPT-OSS-20B (OpenAI's open-weight reasoning model) with 4-bit quantization for efficient inference. Features advanced reasoning capabilities, tool use, and supports Arabic language conversations.
23
+
24
+ Built with [Gradio](https://gradio.app) and deployed on Hugging Face Spaces for easy access.
app.py CHANGED
@@ -5,7 +5,7 @@ import spaces
5
  import json
6
  import time
7
  from threading import Thread
8
- from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
9
  from huggingface_hub import login
10
  import logging
11
 
@@ -88,13 +88,25 @@ def load_model():
88
  if tokenizer.pad_token is None:
89
  tokenizer.pad_token = tokenizer.eos_token
90
 
 
 
 
 
 
 
 
 
 
 
 
91
  # Load model with optimized settings
92
  model = AutoModelForCausalLM.from_pretrained(
93
  MODEL_ID,
94
- torch_dtype=torch.bfloat16,
95
- device_map="auto",
96
- trust_remote_code=True,
97
- low_cpu_mem_usage=True
 
98
  )
99
 
100
  model.eval()
 
5
  import json
6
  import time
7
  from threading import Thread
8
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, BitsAndBytesConfig
9
  from huggingface_hub import login
10
  import logging
11
 
 
88
  if tokenizer.pad_token is None:
89
  tokenizer.pad_token = tokenizer.eos_token
90
 
91
+ # Configure 4-bit quantization
92
+ if config["model"].get("load_in_4bit", False):
93
+ quantization_config = BitsAndBytesConfig(
94
+ load_in_4bit=True,
95
+ bnb_4bit_compute_dtype=torch.float16,
96
+ bnb_4bit_use_double_quant=True,
97
+ bnb_4bit_quant_type="nf4"
98
+ )
99
+ else:
100
+ quantization_config = None
101
+
102
  # Load model with optimized settings
103
  model = AutoModelForCausalLM.from_pretrained(
104
  MODEL_ID,
105
+ torch_dtype=config["model"].get("torch_dtype", "auto"),
106
+ device_map=config["model"].get("device_map", "auto"),
107
+ trust_remote_code=config["model"].get("trust_remote_code", True),
108
+ low_cpu_mem_usage=config["model"].get("low_cpu_mem_usage", True),
109
+ quantization_config=quantization_config
110
  )
111
 
112
  model.eval()
config.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "model": {
3
- "model_id": "anaspro/Lahja-iraqi-4B",
4
- "torch_dtype": "bfloat16",
5
  "device_map": "auto",
6
  "trust_remote_code": true,
7
- "use_flash_attention": true,
8
- "low_cpu_mem_usage": true
9
  },
10
  "generation": {
11
  "max_new_tokens": 800,
 
1
  {
2
  "model": {
3
+ "model_id": "unsloth/gpt-oss-20b-unsloth-bnb-4bit",
4
+ "torch_dtype": "auto",
5
  "device_map": "auto",
6
  "trust_remote_code": true,
7
+ "low_cpu_mem_usage": true,
8
+ "load_in_4bit": true
9
  },
10
  "generation": {
11
  "max_new_tokens": 800,