Qwen-Image-Edit_Fast-Presets

Running on Zero

LPX55 commited on Aug 19

Commit

ac2dcb1

verified ·

1 Parent(s): 8243ca4

Update app_local.py

Files changed (1) hide show

app_local.py CHANGED Viewed

@@ -17,14 +17,10 @@ os.environ.setdefault('GRADIO_ANALYTICS_ENABLED', 'False')
 os.environ.setdefault('HF_HUB_DISABLE_TELEMETRY', '1')
 # Model configuration
-REWRITER_MODEL = "Qwen/Qwen1.5-7B-Chat"  # Upgraded to 7B for better JSON handling
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Preload enhancement model at startup
-print("🔄 Loading prompt enhancement model...")
-rewriter_tokenizer = AutoTokenizer.from_pretrained(REWRITER_MODEL)
 # Quantization configuration
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True,
@@ -38,8 +34,12 @@ rewriter_model = AutoModelForCausalLM.from_pretrained(
     torch_dtype=dtype,
     device_map="auto",
     quantization_config=bnb_config,
-    max_memory={0: "48GiB"},  # Reserve adequate memory
 )
 print("✅ Enhancement model loaded and ready!")
 SYSTEM_PROMPT_EDIT = '''
@@ -129,7 +129,7 @@ def extract_json_response(model_output: str) -> str:
 def polish_prompt(original_prompt: str) -> str:
     """Enhanced prompt rewriting using original system prompt with JSON handling"""
-    load_rewriter()
     # Format as Qwen chat
     messages = [
@@ -151,7 +151,8 @@ def polish_prompt(original_prompt: str) -> str:
             max_new_tokens=256,  # Reduced for better quality
             do_sample=True,
             temperature=0.5,  # Less creative but more focused
-            top_p=0.9,
             no_repeat_ngram_size=3,
             pad_token_id=rewriter_tokenizer.eos_token_id
         )

 os.environ.setdefault('HF_HUB_DISABLE_TELEMETRY', '1')
 # Model configuration
+REWRITER_MODEL = "Qwen/Qwen1.5-4B-Chat"  # Upgraded to 4B for better JSON handling
 dtype = torch.bfloat16
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Quantization configuration
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True,
     torch_dtype=dtype,
     device_map="auto",
     quantization_config=bnb_config,
 )
+# Preload enhancement model at startup
+print("🔄 Loading prompt enhancement model...")
+rewriter_tokenizer = AutoTokenizer.from_pretrained(REWRITER_MODEL)
 print("✅ Enhancement model loaded and ready!")
 SYSTEM_PROMPT_EDIT = '''
 def polish_prompt(original_prompt: str) -> str:
     """Enhanced prompt rewriting using original system prompt with JSON handling"""
+    # load_rewriter()
     # Format as Qwen chat
     messages = [
             max_new_tokens=256,  # Reduced for better quality
             do_sample=True,
             temperature=0.5,  # Less creative but more focused
+            top_p=0.8,
+            repetition_penalty= 1.1,
             no_repeat_ngram_size=3,
             pad_token_id=rewriter_tokenizer.eos_token_id
         )