yukee1992 commited on
Commit
6395efd
·
verified ·
1 Parent(s): 6a274d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -18
app.py CHANGED
@@ -1,23 +1,42 @@
1
- from transformers import AutoModelForCausalLM, AutoTokenizer
2
  import torch
 
 
 
3
 
4
- model_id = "google/gemma-1.1-7b-it"
 
 
 
 
5
 
6
- # CPU-specific config
7
- tokenizer = AutoTokenizer.from_pretrained(model_id)
8
- model = AutoModelForCausalLM.from_pretrained(
9
- model_id,
10
- device_map="cpu",
11
- torch_dtype=torch.float32, # Required for CPU
12
- load_in_8bit=True # Reduces RAM usage by 2x
13
- )
 
14
 
15
- def generate(prompt):
16
- inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
17
- outputs = model.generate(
18
- **inputs,
19
- max_new_tokens=150, # Must stay under 200
20
- do_sample=True,
21
- temperature=0.7
22
  )
23
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
  import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
4
+ from threading import Thread
5
+ import gradio as gr
6
 
7
+ # Configuration
8
+ MODEL_ID = "google/gemma-1.1-7b-it"
9
+ HF_TOKEN = os.getenv("HF_TOKEN") # Will be injected from Space secrets
10
+ MAX_TOKENS = 300
11
+ TEMPERATURE = 0.7
12
 
13
+ # Authentication check
14
+ if not HF_TOKEN:
15
+ raise ValueError("""
16
+ ❌ HF_TOKEN not found!
17
+ Add it in Space Settings -> Repository secrets:
18
+ 1. Click Settings ⚙️
19
+ 2. Go to 'Variables and secrets'
20
+ 3. Add new secret: Name=HF_TOKEN, Value=your_hf_token_here
21
+ """)
22
 
23
+ # Load model
24
+ def load_model():
25
+ print("🚀 Loading model...")
26
+ tokenizer = AutoTokenizer.from_pretrained(
27
+ MODEL_ID,
28
+ token=HF_TOKEN
 
29
  )
30
+
31
+ model = AutoModelForCausalLM.from_pretrained(
32
+ MODEL_ID,
33
+ device_map="auto",
34
+ torch_dtype=torch.float16,
35
+ token=HF_TOKEN
36
+ )
37
+ print("✅ Model loaded!")
38
+ return tokenizer, model
39
+
40
+ tokenizer, model = load_model()
41
+
42
+ # ... [rest of your existing code remains exactly the same] ...