SFM2001 commited on
Commit
8a2aebb
·
1 Parent(s): b92108d

try optimize

Browse files
__pycache__/create_app.cpython-312.pyc CHANGED
Binary files a/__pycache__/create_app.cpython-312.pyc and b/__pycache__/create_app.cpython-312.pyc differ
 
create_app.py CHANGED
@@ -31,7 +31,7 @@ def load_models():
31
  model_name = 'Qwen/Qwen3-1.7B'
32
  QWEN_TOKENIZER = AutoTokenizer.from_pretrained(model_name, device='auto')
33
  QWEN_TOKENIZER.pad_token_id = QWEN_TOKENIZER.eos_token_id
34
- QWEN_MODEL = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16).half()
35
  QWEN_MODEL = QWEN_MODEL.to(device)
36
  MODELS_LOADED = True
37
 
 
31
  model_name = 'Qwen/Qwen3-1.7B'
32
  QWEN_TOKENIZER = AutoTokenizer.from_pretrained(model_name, device='auto')
33
  QWEN_TOKENIZER.pad_token_id = QWEN_TOKENIZER.eos_token_id
34
+ QWEN_MODEL = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, device_map="auto", load_in_8bit=True, torch_dtype=torch.float16).half()
35
  QWEN_MODEL = QWEN_MODEL.to(device)
36
  MODELS_LOADED = True
37
 
inference/__pycache__/infer_single.cpython-312.pyc CHANGED
Binary files a/inference/__pycache__/infer_single.cpython-312.pyc and b/inference/__pycache__/infer_single.cpython-312.pyc differ
 
inference/infer_single.py CHANGED
@@ -1,7 +1,9 @@
1
  from utils.data_utils import *
2
  from utils.prompts import *
3
  import torch
 
4
  from create_app import *
 
5
 
6
  def replace_single_newlines(text):
7
  return re.sub(r'(?<!\n)\n(?!\n)', '\\\\n\\\\n', text)
@@ -16,15 +18,21 @@ def generate_full_prompt(topic, essay, cefr_stat):
16
 
17
 
18
  def generate_and_score_essay(topic, essay):
19
- global MODELS_LOADED, LONGFORMER_TOKENIZER, LONGFORMER_MODEL, QWEN_TOKENIZER, QWEN_MODEL
20
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
21
- LONGFORMER_MODEL = LONGFORMER_MODEL.to(device)
22
- QWEN_MODEL = QWEN_MODEL.to(device)
23
-
24
  cefr_results = get_cefr_stats(essay)
25
  full_prompt = generate_full_prompt(topic=topic, essay=essay, cefr_stat=cefr_results)
26
  essay = replace_single_newlines(essay)
27
  paragraph_cnt = len(essay.replace('\\n\\n', '\\n').split('\\n'))
 
 
 
 
 
 
 
 
 
28
  text = QWEN_TOKENIZER.apply_chat_template(
29
  [{"role": "user", "content": full_prompt}],
30
  tokenize=False,
@@ -38,12 +46,12 @@ def generate_and_score_essay(topic, essay):
38
  truncation=True,
39
  padding_side='left'
40
  ).to(device)
41
- with torch.inference_mode():
42
  outputs = QWEN_MODEL.generate(
43
  **inputs,
44
- max_new_tokens=1500,
45
- use_cache=True,
46
- pad_token_id=QWEN_TOKENIZER.eos_token_id
47
  )
48
  generated_ids = outputs[0][inputs.input_ids.shape[1]:]
49
  full_feedback = QWEN_TOKENIZER.decode(
@@ -78,7 +86,7 @@ def generate_and_score_essay(topic, essay):
78
  padding=True
79
  ).to(device)
80
  LONGFORMER_MODEL.eval()
81
- with torch.no_grad():
82
  outputs = LONGFORMER_MODEL(**score_inputs) # Get full outputs dictionary
83
  scores = outputs['logits'].cpu().numpy()
84
  scores = [round(x) for x in scores[0]]
 
1
  from utils.data_utils import *
2
  from utils.prompts import *
3
  import torch
4
+ from torch.cuda.amp import autocast
5
  from create_app import *
6
+ from transformers import GenerationConfig
7
 
8
  def replace_single_newlines(text):
9
  return re.sub(r'(?<!\n)\n(?!\n)', '\\\\n\\\\n', text)
 
18
 
19
 
20
  def generate_and_score_essay(topic, essay):
 
21
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22
+ global MODELS_LOADED, LONGFORMER_TOKENIZER, LONGFORMER_MODEL, QWEN_TOKENIZER, QWEN_MODEL
 
 
23
  cefr_results = get_cefr_stats(essay)
24
  full_prompt = generate_full_prompt(topic=topic, essay=essay, cefr_stat=cefr_results)
25
  essay = replace_single_newlines(essay)
26
  paragraph_cnt = len(essay.replace('\\n\\n', '\\n').split('\\n'))
27
+ gen_config = GenerationConfig(
28
+ max_new_tokens=512, # cut way down from 1500
29
+ do_sample=True,
30
+ top_k=50,
31
+ top_p=0.9,
32
+ temperature=0.7,
33
+ eos_token_id=QWEN_TOKENIZER.eos_token_id,
34
+ pad_token_id=QWEN_TOKENIZER.eos_token_id,
35
+ )
36
  text = QWEN_TOKENIZER.apply_chat_template(
37
  [{"role": "user", "content": full_prompt}],
38
  tokenize=False,
 
46
  truncation=True,
47
  padding_side='left'
48
  ).to(device)
49
+ with torch.no_grad(), autocast(device_type='cuda', dtype=torch.float16):
50
  outputs = QWEN_MODEL.generate(
51
  **inputs,
52
+ generation_config=gen_config,
53
+ use_cache=True,
54
+ return_dict_in_generate=False,
55
  )
56
  generated_ids = outputs[0][inputs.input_ids.shape[1]:]
57
  full_feedback = QWEN_TOKENIZER.decode(
 
86
  padding=True
87
  ).to(device)
88
  LONGFORMER_MODEL.eval()
89
+ with torch.no_grad(), autocast(device_type='cuda', dtype=torch.float16):
90
  outputs = LONGFORMER_MODEL(**score_inputs) # Get full outputs dictionary
91
  scores = outputs['logits'].cpu().numpy()
92
  scores = [round(x) for x in scores[0]]
instance/users.db CHANGED
Binary files a/instance/users.db and b/instance/users.db differ
 
requirements.txt CHANGED
@@ -10,4 +10,5 @@ flask==3.1.1
10
  flask_login==0.6.3
11
  werkzeug==3.1.3
12
  flask_sqlalchemy==3.1.1
13
- gunicorn
 
 
10
  flask_login==0.6.3
11
  werkzeug==3.1.3
12
  flask_sqlalchemy==3.1.1
13
+ gunicorn
14
+ bitsandbytes-0.42.0
views/__pycache__/auth.cpython-312.pyc CHANGED
Binary files a/views/__pycache__/auth.cpython-312.pyc and b/views/__pycache__/auth.cpython-312.pyc differ
 
views/__pycache__/infer.cpython-312.pyc CHANGED
Binary files a/views/__pycache__/infer.cpython-312.pyc and b/views/__pycache__/infer.cpython-312.pyc differ