Spaces:

rootxhacker
/

CodeAstra-7B-demo

Runtime error

App Files Files Community

rootxhacker commited on Jul 3, 2024

Commit

112e6b8

verified ·

1 Parent(s): bbcea92

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -19

app.py CHANGED Viewed

@@ -4,43 +4,30 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 import gradio as gr
 import spaces
-# Ensure CUDA is available
-assert torch.cuda.is_available(), "CUDA is not available. Please check your GPU setup."
-# Set the device
-device = torch.device("cuda")
-torch.cuda.set_device(0)  # Use the first GPU if multiple are available
 # Load the model and tokenizer
 peft_model_id = "rootxhacker/CodeAstra-7B"
 config = PeftConfig.from_pretrained(peft_model_id)
-# Load the model on GPU
 model = AutoModelForCausalLM.from_pretrained(
     config.base_model_name_or_path,
     return_dict=True,
     load_in_4bit=True,
-    torch_dtype=torch.float16,
-    device_map="auto"
 )
 tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
 # Load the Lora model
 model = PeftModel.from_pretrained(model, peft_model_id)
-model.to(device)
-# Ensure all model parameters are on CUDA
-for param in model.parameters():
-    param.data = param.data.to(device)
 @spaces.GPU(duration=200)
 def get_completion(query, model, tokenizer):
     try:
-        inputs = tokenizer(query, return_tensors="pt").to(device)
         with torch.no_grad():
             outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.7)
-        return tokenizer.decode(outputs[0].cpu(), skip_special_tokens=True)
     except Exception as e:
         return f"An error occurred: {str(e)}"
@@ -59,5 +46,5 @@ iface = gr.Interface(
     description="This tool analyzes code for potential security flaws and provides guidance on secure coding practices."
 )
-# Launch the Gradio app with a public link
-iface.launch()

 import gradio as gr
 import spaces
 # Load the model and tokenizer
 peft_model_id = "rootxhacker/CodeAstra-7B"
 config = PeftConfig.from_pretrained(peft_model_id)
+# Load the model without explicit device mapping
 model = AutoModelForCausalLM.from_pretrained(
     config.base_model_name_or_path,
     return_dict=True,
     load_in_4bit=True,
+    device_map=None  # Let the Spaces environment handle device mapping
 )
 tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
 # Load the Lora model
 model = PeftModel.from_pretrained(model, peft_model_id)
 @spaces.GPU(duration=200)
 def get_completion(query, model, tokenizer):
     try:
+        inputs = tokenizer(query, return_tensors="pt")
         with torch.no_grad():
             outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.7)
+        return tokenizer.decode(outputs[0], skip_special_tokens=True)
     except Exception as e:
         return f"An error occurred: {str(e)}"
     description="This tool analyzes code for potential security flaws and provides guidance on secure coding practices."
 )
+# Launch the Gradio app
+iface.launch()