Spaces:

limitedonly41
/

CV_website_classify

Paused

App Files Files Community

limitedonly41 commited on Oct 3, 2025

Commit

f1aa3d7

verified ·

1 Parent(s): 4eaaba6

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -89

app.py CHANGED Viewed

@@ -8,40 +8,25 @@ from datetime import datetime, timezone
 import httpx
 from deep_translator import GoogleTranslator
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-# Initialize model globals without unsloth
-model_name = "limitedonly41/mistral7b_v3_4_categories"
-model = None
-tokenizer = None
-def load_model():
-    """Load model without unsloth"""
-    global model, tokenizer
-    print("Loading model with transformers...")
-    try:
-        tokenizer = AutoTokenizer.from_pretrained(model_name)
-        model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            torch_dtype=torch.float16,
-            device_map="auto",
-            load_in_4bit=True,
-            trust_remote_code=True
-        )
-        # Set pad token
-        if tokenizer.pad_token is None:
-            tokenizer.pad_token = tokenizer.eos_token
-        print("Model loaded successfully")
-        return True
-    except Exception as e:
-        print(f"Model loading error: {e}")
-        return False
-# Try to load model at startup
-model_loaded = load_model()
 # In-memory storage (replacing Redis)
 task_storage = {}
@@ -88,69 +73,38 @@ def translate_text(text: str) -> str:
 @spaces.GPU
 def predict_inference(translated_text: str) -> str:
-    """GPU-accelerated inference function using transformers"""
     try:
-        global model, tokenizer
-        if not model_loaded or model is None or tokenizer is None:
-            return 'MODEL_ERROR'
         if len(translated_text) < 150:
             return 'Short'
         prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 ### Instruction:
-Categorize the website into one of the 4 categories:\n\n1) OTHER\n2) NEWS/BLOG\n3) E-commerce\n4) COMPANIES
 ### Input:
 {translated_text}
 ### Response:"""
-        # Tokenize input
-        inputs = tokenizer(
-            prompt,
-            return_tensors="pt",
-            max_length=2048,
-            truncation=True,
-            padding=True
-        )
-        # Move to GPU
-        if torch.cuda.is_available():
-            inputs = {k: v.to('cuda') for k, v in inputs.items()}
-        # Generate response
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=16,
-                temperature=0.1,
-                do_sample=False,
-                pad_token_id=tokenizer.eos_token_id
-            )
-        # Decode response
-        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract prediction
-        if '### Response:' in generated_text:
-            ans_pred = generated_text.split('### Response:')[1].strip()
-        else:
-            ans_pred = generated_text.split(prompt)[1].strip() if prompt in generated_text else generated_text
-        # Clean and categorize
-        ans_pred = ans_pred.split('<')[0].strip()
-        if 'OTHER' in ans_pred.upper():
             return 'OTHER'
-        elif 'NEWS/BLOG' in ans_pred.upper() or 'NEWS' in ans_pred.upper() or 'BLOG' in ans_pred.upper():
             return 'NEWS/BLOG'
-        elif 'E-COMMERCE' in ans_pred.upper() or 'ECOMMERCE' in ans_pred.upper():
             return 'E-commerce'
         else:
-            return 'OTHER'  # Default fallback
     except Exception as e:
         print(f"Inference error: {e}")
@@ -159,10 +113,10 @@ Categorize the website into one of the 4 categories:\n\n1) OTHER\n2) NEWS/BLOG\n
 async def scrape_single_url(session: httpx.AsyncClient, url: str) -> Dict:
     """Scrape a single URL"""
     try:
-        response = await session.get(url, timeout=30.0, follow_redirects=True)
         if response.status_code == 200:
-            # Simple text extraction
-            text_content = response.text[:5000]
             return {
                 "url": url,
                 "text": text_content,
@@ -222,9 +176,6 @@ async def process_urls_batch(urls: List[str], progress_callback=None) -> Dict[st
 def process_url_list(url_text: str, progress=gr.Progress()) -> str:
     """Main processing function for Gradio interface"""
-    if not model_loaded:
-        return "❌ Model loading failed. Please check the logs and try again."
     if not url_text.strip():
         return "Please provide URLs to process."
@@ -234,8 +185,8 @@ def process_url_list(url_text: str, progress=gr.Progress()) -> str:
     if not urls:
         return "No valid URLs found."
-    if len(urls) > 20:  # Reduced limit for stability
-        return f"Too many URLs ({len(urls)}). Please limit to 20 URLs."
     try:
         # Process URLs
@@ -262,12 +213,9 @@ def process_url_list(url_text: str, progress=gr.Progress()) -> str:
 # Create Gradio interface
 def create_interface():
-    status_msg = "✅ Model loaded successfully" if model_loaded else "❌ Model loading failed"
     with gr.Blocks(title="Website Category Classifier") as interface:
         gr.HTML("<h1>🔍 Website Category Classifier</h1>")
-        gr.HTML(f"<p>Classify websites into categories: OTHER, NEWS/BLOG, or E-commerce</p>")
-        gr.HTML(f"<p><strong>Status:</strong> {status_msg}</p>")
         with gr.Row():
             with gr.Column():

 import httpx
 from deep_translator import GoogleTranslator
 import torch
+from torch.amp import autocast
+from unsloth import FastLanguageModel
+# Initialize model globally (outside GPU decorator)
+max_seq_length = 2048
+dtype = None
+load_in_4bit = True
+peft_model_name = "limitedonly41/website_mistral7b_v02"
+# Load model once at startup
+print("Loading model...")
+model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name=peft_model_name,
+    max_seq_length=max_seq_length,
+    dtype=dtype,
+    load_in_4bit=load_in_4bit,
+)
+FastLanguageModel.for_inference(model)
+print("Model loaded successfully")
 # In-memory storage (replacing Redis)
 task_storage = {}
 @spaces.GPU
 def predict_inference(translated_text: str) -> str:
+    """GPU-accelerated inference function"""
     try:
         if len(translated_text) < 150:
             return 'Short'
         prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 ### Instruction:
+Categorize the website into one of the 3 categories:\n\n1) OTHER \n2) NEWS/BLOG\n3) E-commerce
 ### Input:
 {translated_text}
 ### Response:"""
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        with autocast(device_type='cuda'):
+            inputs = tokenizer(prompt, return_tensors="pt").to(device)
+            outputs = model.generate(**inputs, max_new_tokens=16, use_cache=True)
+            ans = tokenizer.batch_decode(outputs)[0]
+        ans_pred = ans.split('### Response:')[1].split('<')[0].strip()
+        if 'OTHER' in ans_pred:
             return 'OTHER'
+        elif 'NEWS/BLOG' in ans_pred:
             return 'NEWS/BLOG'
+        elif 'E-commerce' in ans_pred:
             return 'E-commerce'
         else:
+            return 'ERROR'
     except Exception as e:
         print(f"Inference error: {e}")
 async def scrape_single_url(session: httpx.AsyncClient, url: str) -> Dict:
     """Scrape a single URL"""
     try:
+        response = await session.get(url, timeout=30.0)
         if response.status_code == 200:
+            # Simple text extraction (you can enhance this)
+            text_content = response.text[:5000]  # Limit content
             return {
                 "url": url,
                 "text": text_content,
 def process_url_list(url_text: str, progress=gr.Progress()) -> str:
     """Main processing function for Gradio interface"""
     if not url_text.strip():
         return "Please provide URLs to process."
     if not urls:
         return "No valid URLs found."
+    if len(urls) > 50:  # Limit for demo
+        return f"Too many URLs ({len(urls)}). Please limit to 50 URLs."
     try:
         # Process URLs
 # Create Gradio interface
 def create_interface():
     with gr.Blocks(title="Website Category Classifier") as interface:
         gr.HTML("<h1>🔍 Website Category Classifier</h1>")
+        gr.HTML("<p>Classify websites into categories: OTHER, NEWS/BLOG, or E-commerce</p>")
         with gr.Row():
             with gr.Column():