Dolphin-Inference-MGPU

Build error

App Files Files Community

Ketengan-Diffusion-Lab commited on Sep 15, 2024

Commit

c33e052

verified ·

1 Parent(s): 323d186

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -15

app.py CHANGED Viewed

@@ -14,6 +14,7 @@ warnings.filterwarnings('ignore')
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
 model_name = 'failspy/kappa-3-phi-abliterated'
 # create model and load it to the specified device
@@ -30,37 +31,38 @@ tokenizer = AutoTokenizer.from_pretrained(
 )
 def inference(prompt, image, temperature, beam_size):
     messages = [
-        {"role": "user", "content": f'<image>\n{prompt}'}
     ]
-    text = tokenizer.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True
-    )
-    text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
-    input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0).to(device)
-    image_tensor = model.process_images([image], model.config).to(device)
     # Add debug prints
     print(f"Device of model: {next(model.parameters()).device}")
-    print(f"Device of input_ids: {input_ids.device}")
-    print(f"Device of image_tensor: {image_tensor.device}")
     # generate
     with torch.cuda.amp.autocast():
         output_ids = model.generate(
-            input_ids,
-            images=image_tensor,
             max_new_tokens=1024,
             temperature=temperature,
             num_beams=beam_size,
             use_cache=True
         )[0]
-    return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
 with gr.Blocks() as demo:
     with gr.Row():

 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
+# Update model path to your local path
 model_name = 'failspy/kappa-3-phi-abliterated'
 # create model and load it to the specified device
 )
 def inference(prompt, image, temperature, beam_size):
+    # Phi-3 uses a chat template
     messages = [
+        {"role": "user", "content": f"Can you describe this image?\n{prompt}"}
     ]
+    # Apply chat template and add generation prompt
+    inputs = tokenizer.apply_chat_template(
+        messages,
+        add_generation_prompt=True,
+        return_tensors="pt"
+    ).to(device)
+    # Process the image
+    pixel_values = model.prepare_image(image).to(device)
     # Add debug prints
     print(f"Device of model: {next(model.parameters()).device}")
+    print(f"Device of inputs: {inputs.input_ids.device}")
+    print(f"Device of pixel_values: {pixel_values.device}")
     # generate
     with torch.cuda.amp.autocast():
         output_ids = model.generate(
+            inputs.input_ids,
+            pixel_values=pixel_values,
             max_new_tokens=1024,
             temperature=temperature,
             num_beams=beam_size,
             use_cache=True
         )[0]
+    return tokenizer.decode(output_ids[inputs.input_ids.shape[1]:], skip_special_tokens=True).strip()
 with gr.Blocks() as demo:
     with gr.Row():