llaa33219 commited on
Commit
dd51a85
·
verified ·
1 Parent(s): 72ad230

Upload 3 files

Browse files
Files changed (2) hide show
  1. app.py +15 -4
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,11 +1,12 @@
1
  import gradio as gr
 
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
4
 
5
  model_cache = {}
6
 
7
- def load_model_with_extension(model_id, extension_method, new_context_length, rope_type, rope_factor):
8
- cache_key = f"{model_id}_{extension_method}_{new_context_length}_{rope_type}_{rope_factor}"
9
 
10
  if cache_key in model_cache:
11
  return model_cache[cache_key]
@@ -31,7 +32,16 @@ def load_model_with_extension(model_id, extension_method, new_context_length, ro
31
  config.rope_scaling = {"type": "yarn", "factor": rope_factor, "original_max_position_embeddings": original_context}
32
  config.rope_theta = original_theta
33
 
34
- model = AutoModelForCausalLM.from_pretrained(model_id, config=config, torch_dtype=torch.float32, device_map="cpu", low_cpu_mem_usage=True, trust_remote_code=True)
 
 
 
 
 
 
 
 
 
35
  model.eval()
36
 
37
  result = {"model": model, "tokenizer": tokenizer, "original_context": original_context, "applied_context": new_context_length}
@@ -39,6 +49,7 @@ def load_model_with_extension(model_id, extension_method, new_context_length, ro
39
  return result
40
 
41
 
 
42
  def generate(model_id, extension_method, new_context_length, rope_type, rope_factor, prompt, max_new_tokens, temperature, top_p):
43
  if not model_id.strip():
44
  return "Error: Please enter a model ID"
@@ -54,7 +65,7 @@ def generate(model_id, extension_method, new_context_length, rope_type, rope_fac
54
  tokenizer = model_data["tokenizer"]
55
 
56
  try:
57
- inputs = tokenizer(prompt, return_tensors="pt", truncation=False, padding=False)
58
  with torch.no_grad():
59
  outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, do_sample=temperature > 0, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id)
60
  generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
1
  import gradio as gr
2
+ import spaces
3
  import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
5
 
6
  model_cache = {}
7
 
8
+ def load_model_with_extension(model_id, extension_method, new_context_length, rope_type, rope_factor, device="cuda"):
9
+ cache_key = f"{model_id}_{extension_method}_{new_context_length}_{rope_type}_{rope_factor}_{device}"
10
 
11
  if cache_key in model_cache:
12
  return model_cache[cache_key]
 
32
  config.rope_scaling = {"type": "yarn", "factor": rope_factor, "original_max_position_embeddings": original_context}
33
  config.rope_theta = original_theta
34
 
35
+ torch_dtype = torch.float16 if device == "cuda" else torch.float32
36
+
37
+ model = AutoModelForCausalLM.from_pretrained(
38
+ model_id,
39
+ config=config,
40
+ torch_dtype=torch_dtype,
41
+ device_map=device,
42
+ low_cpu_mem_usage=True,
43
+ trust_remote_code=True
44
+ )
45
  model.eval()
46
 
47
  result = {"model": model, "tokenizer": tokenizer, "original_context": original_context, "applied_context": new_context_length}
 
49
  return result
50
 
51
 
52
+ @spaces.GPU(duration=120)
53
  def generate(model_id, extension_method, new_context_length, rope_type, rope_factor, prompt, max_new_tokens, temperature, top_p):
54
  if not model_id.strip():
55
  return "Error: Please enter a model ID"
 
65
  tokenizer = model_data["tokenizer"]
66
 
67
  try:
68
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
69
  with torch.no_grad():
70
  outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, do_sample=temperature > 0, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id)
71
  generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  transformers>=4.35.0
2
  accelerate>=0.25.0
3
  huggingface-hub>=0.36.0,<1.0.0
 
 
1
  transformers>=4.35.0
2
  accelerate>=0.25.0
3
  huggingface-hub>=0.36.0,<1.0.0
4
+ spaces>=0.0.15