LPX55 commited on
Commit
ac2dcb1
·
verified ·
1 Parent(s): 8243ca4

Update app_local.py

Browse files
Files changed (1) hide show
  1. app_local.py +9 -8
app_local.py CHANGED
@@ -17,14 +17,10 @@ os.environ.setdefault('GRADIO_ANALYTICS_ENABLED', 'False')
17
  os.environ.setdefault('HF_HUB_DISABLE_TELEMETRY', '1')
18
 
19
  # Model configuration
20
- REWRITER_MODEL = "Qwen/Qwen1.5-7B-Chat" # Upgraded to 7B for better JSON handling
21
  dtype = torch.bfloat16
22
  device = "cuda" if torch.cuda.is_available() else "cpu"
23
 
24
- # Preload enhancement model at startup
25
- print("🔄 Loading prompt enhancement model...")
26
- rewriter_tokenizer = AutoTokenizer.from_pretrained(REWRITER_MODEL)
27
-
28
  # Quantization configuration
29
  bnb_config = BitsAndBytesConfig(
30
  load_in_4bit=True,
@@ -38,8 +34,12 @@ rewriter_model = AutoModelForCausalLM.from_pretrained(
38
  torch_dtype=dtype,
39
  device_map="auto",
40
  quantization_config=bnb_config,
41
- max_memory={0: "48GiB"}, # Reserve adequate memory
42
  )
 
 
 
 
 
43
  print("✅ Enhancement model loaded and ready!")
44
 
45
  SYSTEM_PROMPT_EDIT = '''
@@ -129,7 +129,7 @@ def extract_json_response(model_output: str) -> str:
129
 
130
  def polish_prompt(original_prompt: str) -> str:
131
  """Enhanced prompt rewriting using original system prompt with JSON handling"""
132
- load_rewriter()
133
 
134
  # Format as Qwen chat
135
  messages = [
@@ -151,7 +151,8 @@ def polish_prompt(original_prompt: str) -> str:
151
  max_new_tokens=256, # Reduced for better quality
152
  do_sample=True,
153
  temperature=0.5, # Less creative but more focused
154
- top_p=0.9,
 
155
  no_repeat_ngram_size=3,
156
  pad_token_id=rewriter_tokenizer.eos_token_id
157
  )
 
17
  os.environ.setdefault('HF_HUB_DISABLE_TELEMETRY', '1')
18
 
19
  # Model configuration
20
+ REWRITER_MODEL = "Qwen/Qwen1.5-4B-Chat" # Upgraded to 4B for better JSON handling
21
  dtype = torch.bfloat16
22
  device = "cuda" if torch.cuda.is_available() else "cpu"
23
 
 
 
 
 
24
  # Quantization configuration
25
  bnb_config = BitsAndBytesConfig(
26
  load_in_4bit=True,
 
34
  torch_dtype=dtype,
35
  device_map="auto",
36
  quantization_config=bnb_config,
 
37
  )
38
+
39
+ # Preload enhancement model at startup
40
+ print("🔄 Loading prompt enhancement model...")
41
+ rewriter_tokenizer = AutoTokenizer.from_pretrained(REWRITER_MODEL)
42
+
43
  print("✅ Enhancement model loaded and ready!")
44
 
45
  SYSTEM_PROMPT_EDIT = '''
 
129
 
130
  def polish_prompt(original_prompt: str) -> str:
131
  """Enhanced prompt rewriting using original system prompt with JSON handling"""
132
+ # load_rewriter()
133
 
134
  # Format as Qwen chat
135
  messages = [
 
151
  max_new_tokens=256, # Reduced for better quality
152
  do_sample=True,
153
  temperature=0.5, # Less creative but more focused
154
+ top_p=0.8,
155
+ repetition_penalty= 1.1,
156
  no_repeat_ngram_size=3,
157
  pad_token_id=rewriter_tokenizer.eos_token_id
158
  )