Spaces:

Gajendra5490
/

ApiEndPointDemo

Runtime error

Gajendra5490 commited on Mar 2, 2025

Commit

71976e8

verified ·

1 Parent(s): f018461

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,18 +1,13 @@
-import gradio as gr
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
 # Model name
 model_name = "MONAI/Llama3-VILA-M3-8B"
-# Load tokenizer and model with trust_remote_code=True
-tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    torch_dtype=torch.float16,
-    device_map="auto",
-    trust_remote_code=True
-)
 def generate_response(prompt):
     inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
@@ -25,8 +20,8 @@ iface = gr.Interface(
     fn=generate_response,
     inputs=gr.Textbox(lines=2, placeholder="Enter your prompt..."),
     outputs="text",
-    title="MONAI Llama3-VILA-M3-8B Chatbot",
-    description="A chatbot powered by MONAI/Llama3-VILA-M3-8B",
 )
 iface.launch()

 import torch
+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from llava.model.builder import load_pretrained_model  # Import LLaVA model builder
 # Model name
 model_name = "MONAI/Llama3-VILA-M3-8B"
+# Load LLaVA model
+tokenizer, model, _ = load_pretrained_model(model_path=model_name, model_base=None, device="cuda" if torch.cuda.is_available() else "cpu")
 def generate_response(prompt):
     inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
     fn=generate_response,
     inputs=gr.Textbox(lines=2, placeholder="Enter your prompt..."),
     outputs="text",
+    title="LLaVA Llama3-VILA-M3-8B Chatbot",
+    description="A chatbot powered by LLaVA and Llama3-VILA-M3-8B",
 )
 iface.launch()