Spaces:

limitedonly41
/

website_classification

Sleeping

App Files Files Community

limitedonly41 commited on Sep 20, 2024

Commit

ced26d2

verified ·

1 Parent(s): 4c0d842

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -39

app.py CHANGED Viewed

@@ -102,31 +102,31 @@ def main(urls):
 def classify_website(url):
     global model, tokenizer  # Declare model and tokenizer as global variables
-    try:
-        # Load the model and tokenizer if they are not already loaded
-        if model is None or tokenizer is None:
-            from unsloth import FastLanguageModel
-            # Load the model and tokenizer
-            model, tokenizer = FastLanguageModel.from_pretrained(
-                model_name=peft_model_name,  # Model used for training
-                max_seq_length=max_seq_length,
-                dtype=dtype,
-                load_in_4bit=load_in_4bit,
-            )
-            FastLanguageModel.for_inference(model)  # Enable native 2x faster inference
-        urls = [url]
-        results_shop = main(urls)
-        # Convert results to DataFrame
-        df_result_train_more = pd.DataFrame(results_shop)
-        text = df_result_train_more['text'][0]
-        translated = GoogleTranslator(source='auto', target='en').translate(text[:4990])
-        prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 ### Instruction:
 Categorize the website into one of the 3 categories:
@@ -140,23 +140,23 @@ Categorize the website into one of the 3 categories:
 ### Response:"""
-        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
-        outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
-        ans = tokenizer.batch_decode(outputs)[0]
-        ans_pred = ans.split('### Response:')[1].split('<')[0]
-        if 'OTHER' in ans_pred:
-            ans_pred = 'OTHER'
-        elif 'NEWS/BLOG' in ans_pred:
-            ans_pred = 'NEWS/BLOG'
-        elif 'E-commerce' in ans_pred:
-            ans_pred = 'E-commerce'
-        return ans_pred
-    except Exception as e:
-        logging.exception(e)
-        return str(e)
 # Create a Gradio interface
 iface = gr.Interface(

 def classify_website(url):
     global model, tokenizer  # Declare model and tokenizer as global variables
+    # try:
+    # Load the model and tokenizer if they are not already loaded
+    if model is None or tokenizer is None:
+        from unsloth import FastLanguageModel
+        # Load the model and tokenizer
+        model, tokenizer = FastLanguageModel.from_pretrained(
+            model_name=peft_model_name,  # Model used for training
+            max_seq_length=max_seq_length,
+            dtype=dtype,
+            load_in_4bit=load_in_4bit,
+        )
+        FastLanguageModel.for_inference(model)  # Enable native 2x faster inference
+    urls = [url]
+    results_shop = main(urls)
+    # Convert results to DataFrame
+    df_result_train_more = pd.DataFrame(results_shop)
+    text = df_result_train_more['text'][0]
+    translated = GoogleTranslator(source='auto', target='en').translate(text[:4990])
+    prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 ### Instruction:
 Categorize the website into one of the 3 categories:
 ### Response:"""
+    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+    outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
+    ans = tokenizer.batch_decode(outputs)[0]
+    ans_pred = ans.split('### Response:')[1].split('<')[0]
+    if 'OTHER' in ans_pred:
+        ans_pred = 'OTHER'
+    elif 'NEWS/BLOG' in ans_pred:
+        ans_pred = 'NEWS/BLOG'
+    elif 'E-commerce' in ans_pred:
+        ans_pred = 'E-commerce'
+    return ans_pred
+    # except Exception as e:
+    #     logging.exception(e)
+    #     return str(e)
 # Create a Gradio interface
 iface = gr.Interface(