Spaces:

limitedonly41
/

website_classification

Sleeping

App Files Files Community

limitedonly41 commited on Jun 8, 2024

Commit

b0da584

verified ·

1 Parent(s): 410c398

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -17

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import gradio as gr
-from unsloth import FastLanguageModel
 import torch
 # Configuration
@@ -9,24 +8,39 @@ load_in_4bit = True  # Use 4-bit quantization to reduce memory usage
 peft_model_name = "limitedonly41/website_mistral7b_v02_1200_finetuned_5_big"
-# Load the model and tokenizer
-model, tokenizer = FastLanguageModel.from_pretrained(
-    model_name=peft_model_name,  # YOUR MODEL YOU USED FOR TRAINING
-    max_seq_length=max_seq_length,
-    dtype=dtype,
-    load_in_4bit=load_in_4bit,
-)
-FastLanguageModel.for_inference(model)  # Enable native 2x faster inference
-def return_prediction(prompt):
-    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
-    outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
-    ans = tokenizer.batch_decode(outputs)[0]
-    ans_pred = ans.split('### Response:')[1].split('<')[0]
-    return ans_pred
-@spaces.GPU
 def classify_website(site_text):
     prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 ### Instruction:
@@ -40,7 +54,14 @@ Categorize the website into one of the 3 categories:
 {site_text}
 ### Response:"""
-    return return_prediction(prompt)
 # Create a Gradio interface
 iface = gr.Interface(

 import gradio as gr
 import torch
 # Configuration
 peft_model_name = "limitedonly41/website_mistral7b_v02_1200_finetuned_5_big"
+# from unsloth import FastLanguageModel
+# # Load the model and tokenizer
+# model, tokenizer = FastLanguageModel.from_pretrained(
+#     model_name=peft_model_name,  # YOUR MODEL YOU USED FOR TRAINING
+#     max_seq_length=max_seq_length,
+#     dtype=dtype,
+#     load_in_4bit=load_in_4bit,
+# )
+# FastLanguageModel.for_inference(model)  # Enable native 2x faster inference
+# def return_prediction(prompt):
+#     inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+#     outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
+#     ans = tokenizer.batch_decode(outputs)[0]
+#     ans_pred = ans.split('### Response:')[1].split('<')[0]
+#     return ans_pred
+@spaces.GPU()
 def classify_website(site_text):
+    from unsloth import FastLanguageModel
+    # Load the model and tokenizer
+    model, tokenizer = FastLanguageModel.from_pretrained(
+        model_name=peft_model_name,  # YOUR MODEL YOU USED FOR TRAINING
+        max_seq_length=max_seq_length,
+        dtype=dtype,
+        load_in_4bit=load_in_4bit,
+    )
+    FastLanguageModel.for_inference(model)  # Enable native 2x faster inference
     prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 ### Instruction:
 {site_text}
 ### Response:"""
+    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+    outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
+    ans = tokenizer.batch_decode(outputs)[0]
+    ans_pred = ans.split('### Response:')[1].split('<')[0]
+    return ans_pred
+    # return return_prediction(prompt)
 # Create a Gradio interface
 iface = gr.Interface(