Spaces:

Ram-N
/

nova-llm-backend

Sleeping

App Files Files Community

Ram-N commited on Jun 26, 2025

Commit

9899531

verified ·

1 Parent(s): 2425e2a

stubbed responses testing

Browse files

Files changed (2) hide show

app.py +46 -93
requirements.txt +1 -8

app.py CHANGED Viewed

@@ -3,108 +3,64 @@ import os
 import json
 import logging
 import time
-from transformers import AutoModelForCausalLM, AutoTokenizer
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
 logger = logging.getLogger(__name__)
-# Use a much smaller model for better performance on CPU
-# Options: "microsoft/phi-2" (2.7B), "google/gemma-2b-it" (2B), "TinyLlama/TinyLlama-1.1B-Chat-v1.0" (1.1B)
-MODEL_ID = "microsoft/phi-2"  # Small but powerful 2.7B model
-LOADED_MODEL = False
-# Use cached responses for faster testing
-response_cache = {}
-# Try to load the model with optimizations for CPU
-try:
-    logger.info(f"Loading model: {MODEL_ID}...")
-    start_time = time.time()
-    # Load tokenizer first
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-    logger.info(f"Tokenizer loaded in {time.time() - start_time:.2f} seconds")
-    # Load model with CPU optimizations
-    model = AutoModelForCausalLM.from_pretrained(
-        MODEL_ID,
-        low_cpu_mem_usage=True,     # Optimize for CPU
-        torch_dtype="auto",         # Use best available precision
-        device_map="auto",          # Let the library decide where to put the model
-    )
-    logger.info(f"Model loaded in {time.time() - start_time:.2f} seconds")
-    LOADED_MODEL = True
-except Exception as e:
-    logger.error(f"Failed to load model: {e}")
-    tokenizer = None
-    model = None
-def format_prompt(text):
-    """Format a prompt for the model with appropriate wrapping."""
-    if MODEL_ID == "microsoft/phi-2":
-        # Phi-2 format
-        return f"<|system|>\nYou are Nova, a supportive AI chatbot designed to help users with emotional regulation. Your responses should be empathetic, supportive, and focused on wellbeing. Keep responses concise and helpful.\n<|user|>\n{text}\n<|assistant|>"
     else:
-        # Generic format for other models
-        return f"System: You are Nova, a supportive AI chatbot designed to help users with emotional regulation. Your responses should be empathetic, supportive, and focused on wellbeing. Keep responses concise and helpful.\nUser: {text}\nAssistant:"
-def generate_response(prompt, max_new_tokens=100, temperature=0.7):
-    """Generate a response from the model or use a mock response if model failed to load."""
-    # Check cache first
-    if prompt in response_cache:
-        logger.info(f"Using cached response for: {prompt[:30]}...")
-        return response_cache[prompt]
-    # If model failed to load, return mock response
-    if not LOADED_MODEL or model is None:
-        mock_response = f"I understand you said: '{prompt}'. As Nova, I'm here to support you. How can I help with your emotional wellbeing today?"
-        response_cache[prompt] = mock_response
-        return mock_response
-    try:
-        start_time = time.time()
-        logger.info(f"Generating response for: {prompt[:30]}...")
-        formatted_prompt = format_prompt(prompt)
-        inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
-        # Generate with more conservative settings for speed
-        output = model.generate(
-            inputs.input_ids,
-            max_new_tokens=max_new_tokens,  # Shorter responses
-            temperature=temperature,
-            do_sample=True,
-            top_p=0.9,
-            pad_token_id=tokenizer.eos_token_id,  # Prevent padding issues
-        )
-        # Decode and clean up the output
-        response = tokenizer.decode(output[0], skip_special_tokens=True)
-        # Extract just the assistant's reply
-        if MODEL_ID == "microsoft/phi-2":
-            parts = response.split("<|assistant|>")
-            if len(parts) > 1:
-                response = parts[1].strip()
-        else:
-            parts = response.split("Assistant:")
-            if len(parts) > 1:
-                response = parts[1].strip()
-        logger.info(f"Response generated in {time.time() - start_time:.2f} seconds")
-        # Cache for future use
-        response_cache[prompt] = response
-        return response
-    except Exception as e:
-        logger.error(f"Error generating response: {e}")
-        error_response = f"I'm having trouble processing that request. How can I support you today?"
-        response_cache[prompt] = error_response
-        return error_response
 # API endpoint that mimics your current backend API
 def chat_api(message):
@@ -125,7 +81,7 @@ def chat_api(message):
             "timestamp": None  # Will be filled by your backend
         },
         "meta": {
-            "model_used": MODEL_ID,
             "debug": None
         }
     }
@@ -134,10 +90,7 @@ def chat_api(message):
 # Create Gradio Interface
 with gr.Blocks() as demo:
-    gr.Markdown(f"# NOVA LLM Backend\nRunning model: {MODEL_ID}")
-    if not LOADED_MODEL:
-        gr.Markdown("⚠️ **WARNING**: Model failed to load. Using mock responses.")
     with gr.Tab("Chat"):
         with gr.Row():

 import json
 import logging
 import time
+import random
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
 logger = logging.getLogger(__name__)
+# Define mock responses for reliability
+MOCK_RESPONSES = [
+    "I understand how you're feeling. Would you like to talk more about what's been going on?",
+    "That sounds challenging. I'm here to support you through this. What has been helping you cope so far?",
+    "Thank you for sharing that with me. How long have you been feeling this way?",
+    "I hear you. It's important to acknowledge these emotions. Would it help to explore some calming techniques?",
+    "I appreciate you opening up. Is there anything specific that triggers these feelings for you?",
+    "Your feelings are valid. What kind of support would be most helpful for you right now?",
+    "I'm here to listen without judgment. Would you like to talk more about this or would you prefer some suggestions?",
+    "It takes courage to express how you're feeling. How can I best support you today?",
+    "That's really challenging to deal with. Have you been able to talk to anyone else about this?",
+    "I'm glad you reached out. Sometimes just putting feelings into words can help us process them better."
+]
+def select_mock_response(prompt):
+    """Select an appropriate mock response based on the input."""
+    # Simple keyword matching for a slightly more relevant response
+    if any(word in prompt.lower() for word in ["sad", "depress", "down", "unhappy"]):
+        return "I understand you're feeling down. Remember that emotions come and go, and you won't feel this way forever. What small activity might bring you a moment of joy today?"
+    elif any(word in prompt.lower() for word in ["anxious", "worry", "stress", "nervous"]):
+        return "I hear that anxiety is affecting you. Taking slow, deep breaths can help calm your nervous system. Would you like to try a quick breathing exercise together?"
+    elif any(word in prompt.lower() for word in ["angry", "mad", "frustrated", "upset"]):
+        return "It sounds like you're feeling frustrated. That's completely valid. Sometimes it helps to express these feelings in a safe way. What usually helps you process anger?"
+    elif any(word in prompt.lower() for word in ["happy", "joy", "excite", "great"]):
+        return "I'm glad to hear you're feeling positive! Moments of joy are worth celebrating. What contributed to this good feeling?"
+    elif any(word in prompt.lower() for word in ["confus", "uncertain", "lost"]):
+        return "Feeling uncertain can be uncomfortable. Breaking things down into smaller parts sometimes helps provide clarity. What specific aspect feels most confusing right now?"
+    elif "hello" in prompt.lower() or "hi" in prompt.lower():
+        return "Hello! I'm Nova, here to support your emotional wellbeing. How are you feeling today?"
     else:
+        # Return a random response if no keywords match
+        return random.choice(MOCK_RESPONSES)
+def generate_response(prompt):
+    """Generate a response using our mock system."""
+    logger.info(f"Generating response for: {prompt[:30]}...")
+    start_time = time.time()
+    # Get appropriate mock response
+    response = select_mock_response(prompt)
+    # Simulate a brief delay for realism (but not too long)
+    time.sleep(0.5)
+    logger.info(f"Response generated in {time.time() - start_time:.2f} seconds")
+    return response
 # API endpoint that mimics your current backend API
 def chat_api(message):
             "timestamp": None  # Will be filled by your backend
         },
         "meta": {
+            "model_used": "nova-rule-based",
             "debug": None
         }
     }
 # Create Gradio Interface
 with gr.Blocks() as demo:
+    gr.Markdown(f"# NOVA Rule-Based Backend\n**Note:** Using reliable rule-based responses for faster performance")
     with gr.Tab("Chat"):
         with gr.Row():

requirements.txt CHANGED Viewed

@@ -1,8 +1 @@
-gradio>=3.50.0
-transformers>=4.35.0
-torch>=2.0.0
-sentencepiece
-accelerate
-bitsandbytes>=0.39.0
-safetensors
-einops


1	+ gradio>=3.50.0