Spaces:

hamxaameer
/

OutfitOrbit-Chatbot-Assistant

Running

App Files Files Community

hamxaameer commited on 7 days ago

Commit

4096b3f

verified ·

1 Parent(s): 5a8f7f6

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -81

app.py CHANGED Viewed

@@ -44,9 +44,7 @@ def initialize_llm():
     logger.info("🔄 Initializing FREE local language model...")
     BACKUP_MODELS = [
-        "HuggingFaceH4/zephyr-7b-beta",  # Primary - 7B, excellent quality
-        "mistralai/Mistral-7B-Instruct-v0.2",  # Backup - 7B, very good
-        "google/flan-t5-xl",  # Fallback - 3B, reliable
     ]
     for model_name in BACKUP_MODELS:
@@ -54,24 +52,15 @@ def initialize_llm():
             logger.info(f"   Trying {model_name}...")
             device = 0 if torch.cuda.is_available() else -1
-            # Determine task and model type
-            if "t5" in model_name.lower():
-                task = "text2text-generation"
-                model_type = "t5"
-            elif "zephyr" in model_name.lower():
-                task = "text-generation"
-                model_type = "zephyr"
-            elif "mistral" in model_name.lower():
-                task = "text-generation"
-                model_type = "mistral"
-            else:
-                task = "text-generation"
-                model_type = "instruct"
-            # Model-specific kwargs for optimization
             model_kwargs = {
                 "low_cpu_mem_usage": True,
-                "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32
             }
             llm_client = pipeline(
@@ -374,78 +363,33 @@ def generate_llm_answer(
         top_p = 0.97
         repetition_penalty = 1.25
-    # Create prompt based on model type
-    model_type = CONFIG.get("model_type", "instruct")
-    if model_type == "t5":
-        # T5 needs simple format
-        user_prompt = f"Answer this fashion question using the context:\n\nQuestion: {query}\n\nContext: {context_text[:1000]}\n\nAnswer:"
-    elif model_type == "zephyr":
-        # Zephyr chat format
-        user_prompt = f"""<|system|>
-You are a professional fashion advisor. Use the provided fashion knowledge to give specific, detailed advice.</|system|>
-<|user|>
-Fashion Knowledge:
-{context_text[:1500]}
-Question: {query}
-Provide a detailed, specific answer (150-250 words) based on the fashion knowledge above.</|user|>
-<|assistant|>"""
-    elif model_type == "mistral":
-        # Mistral instruct format
-        user_prompt = f"""[INST] You are a fashion expert. Use the following fashion knowledge to answer the question with specific, practical advice.
 Fashion Knowledge:
-{context_text[:1500]}
 Question: {query}
-Provide a detailed answer (150-250 words). [/INST]"""
-    else:
-        # Generic instruct format
-        user_prompt = f"""[INST] Question: {query}
-Fashion Knowledge:
-{context_text}
-Answer the question using the knowledge above. Be specific and helpful (150-250 words). [/INST]"""
     try:
         logger.info(f"  → Calling {CONFIG['llm_model']} (temp={temperature}, tokens={max_tokens})...")
-        # Call pipeline with model-specific parameters
-        if model_type == "t5":
-            # T5 uses max_length
-            output = llm_client(
-                user_prompt,
-                max_length=200,
-                temperature=0.7,
-                top_p=0.9,
-                do_sample=True
-            )
-        elif model_type in ["zephyr", "mistral"]:
-            # Modern instruct models - optimized for quality
-            output = llm_client(
-                user_prompt,
-                max_new_tokens=250,  # Good length for detailed answers
-                temperature=0.7,  # Balanced creativity
-                top_p=0.9,
-                repetition_penalty=1.1,
-                do_sample=True,
-                return_full_text=False
-            )
-        else:
-            # Other models
-            output = llm_client(
-                user_prompt,
-                max_new_tokens=max_tokens,
-                temperature=temperature,
-                top_p=top_p,
-                repetition_penalty=repetition_penalty,
-                do_sample=True,
-                return_full_text=False
-            )
         # Extract generated text
         response = output[0]['generated_text'].strip()

     logger.info("🔄 Initializing FREE local language model...")
     BACKUP_MODELS = [
+        "microsoft/phi-2",  # 2.7B - Best quality that fits in 16GB
     ]
     for model_name in BACKUP_MODELS:
             logger.info(f"   Trying {model_name}...")
             device = 0 if torch.cuda.is_available() else -1
+            # Phi-2 configuration
+            task = "text-generation"
+            model_type = "phi"
+            # Optimized for memory efficiency
             model_kwargs = {
                 "low_cpu_mem_usage": True,
+                "torch_dtype": torch.float32,  # Use float32 for CPU
+                "trust_remote_code": True  # Required for Phi-2
             }
             llm_client = pipeline(
         top_p = 0.97
         repetition_penalty = 1.25
+    # Create prompt for Phi-2
+    model_type = CONFIG.get("model_type", "phi")
+    # Phi-2 optimized format (simple and effective)
+    user_prompt = f"""Instruct: You are a professional fashion advisor. Use the fashion knowledge below to answer the question with specific, detailed advice.
 Fashion Knowledge:
+{context_text[:1200]}
 Question: {query}
+Output: """
     try:
         logger.info(f"  → Calling {CONFIG['llm_model']} (temp={temperature}, tokens={max_tokens})...")
+        # Phi-2 optimized parameters
+        output = llm_client(
+            user_prompt,
+            max_new_tokens=min(max_tokens, 250),  # Cap for speed
+            temperature=0.7,  # Balanced
+            top_p=0.9,
+            repetition_penalty=1.15,
+            do_sample=True,
+            return_full_text=False,
+            pad_token_id=50256  # Phi-2 pad token
+        )
         # Extract generated text
         response = output[0]['generated_text'].strip()