Ram-N commited on
Commit
9899531
·
verified ·
1 Parent(s): 2425e2a

stubbed responses testing

Browse files
Files changed (2) hide show
  1. app.py +46 -93
  2. requirements.txt +1 -8
app.py CHANGED
@@ -3,108 +3,64 @@ import os
3
  import json
4
  import logging
5
  import time
6
- from transformers import AutoModelForCausalLM, AutoTokenizer
7
 
8
  # Set up logging
9
  logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
10
  logger = logging.getLogger(__name__)
11
 
12
- # Use a much smaller model for better performance on CPU
13
- # Options: "microsoft/phi-2" (2.7B), "google/gemma-2b-it" (2B), "TinyLlama/TinyLlama-1.1B-Chat-v1.0" (1.1B)
14
- MODEL_ID = "microsoft/phi-2" # Small but powerful 2.7B model
15
- LOADED_MODEL = False
 
 
 
 
 
 
 
 
 
16
 
17
- # Use cached responses for faster testing
18
- response_cache = {}
19
-
20
- # Try to load the model with optimizations for CPU
21
- try:
22
- logger.info(f"Loading model: {MODEL_ID}...")
23
- start_time = time.time()
24
 
25
- # Load tokenizer first
26
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
27
- logger.info(f"Tokenizer loaded in {time.time() - start_time:.2f} seconds")
28
 
29
- # Load model with CPU optimizations
30
- model = AutoModelForCausalLM.from_pretrained(
31
- MODEL_ID,
32
- low_cpu_mem_usage=True, # Optimize for CPU
33
- torch_dtype="auto", # Use best available precision
34
- device_map="auto", # Let the library decide where to put the model
35
- )
36
 
37
- logger.info(f"Model loaded in {time.time() - start_time:.2f} seconds")
38
- LOADED_MODEL = True
 
 
 
 
 
 
39
 
40
- except Exception as e:
41
- logger.error(f"Failed to load model: {e}")
42
- tokenizer = None
43
- model = None
44
-
45
- def format_prompt(text):
46
- """Format a prompt for the model with appropriate wrapping."""
47
- if MODEL_ID == "microsoft/phi-2":
48
- # Phi-2 format
49
- return f"<|system|>\nYou are Nova, a supportive AI chatbot designed to help users with emotional regulation. Your responses should be empathetic, supportive, and focused on wellbeing. Keep responses concise and helpful.\n<|user|>\n{text}\n<|assistant|>"
50
  else:
51
- # Generic format for other models
52
- return f"System: You are Nova, a supportive AI chatbot designed to help users with emotional regulation. Your responses should be empathetic, supportive, and focused on wellbeing. Keep responses concise and helpful.\nUser: {text}\nAssistant:"
53
 
54
- def generate_response(prompt, max_new_tokens=100, temperature=0.7):
55
- """Generate a response from the model or use a mock response if model failed to load."""
56
- # Check cache first
57
- if prompt in response_cache:
58
- logger.info(f"Using cached response for: {prompt[:30]}...")
59
- return response_cache[prompt]
60
 
61
- # If model failed to load, return mock response
62
- if not LOADED_MODEL or model is None:
63
- mock_response = f"I understand you said: '{prompt}'. As Nova, I'm here to support you. How can I help with your emotional wellbeing today?"
64
- response_cache[prompt] = mock_response
65
- return mock_response
66
 
67
- try:
68
- start_time = time.time()
69
- logger.info(f"Generating response for: {prompt[:30]}...")
70
-
71
- formatted_prompt = format_prompt(prompt)
72
- inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
73
-
74
- # Generate with more conservative settings for speed
75
- output = model.generate(
76
- inputs.input_ids,
77
- max_new_tokens=max_new_tokens, # Shorter responses
78
- temperature=temperature,
79
- do_sample=True,
80
- top_p=0.9,
81
- pad_token_id=tokenizer.eos_token_id, # Prevent padding issues
82
- )
83
-
84
- # Decode and clean up the output
85
- response = tokenizer.decode(output[0], skip_special_tokens=True)
86
-
87
- # Extract just the assistant's reply
88
- if MODEL_ID == "microsoft/phi-2":
89
- parts = response.split("<|assistant|>")
90
- if len(parts) > 1:
91
- response = parts[1].strip()
92
- else:
93
- parts = response.split("Assistant:")
94
- if len(parts) > 1:
95
- response = parts[1].strip()
96
-
97
- logger.info(f"Response generated in {time.time() - start_time:.2f} seconds")
98
-
99
- # Cache for future use
100
- response_cache[prompt] = response
101
- return response
102
-
103
- except Exception as e:
104
- logger.error(f"Error generating response: {e}")
105
- error_response = f"I'm having trouble processing that request. How can I support you today?"
106
- response_cache[prompt] = error_response
107
- return error_response
108
 
109
  # API endpoint that mimics your current backend API
110
  def chat_api(message):
@@ -125,7 +81,7 @@ def chat_api(message):
125
  "timestamp": None # Will be filled by your backend
126
  },
127
  "meta": {
128
- "model_used": MODEL_ID,
129
  "debug": None
130
  }
131
  }
@@ -134,10 +90,7 @@ def chat_api(message):
134
 
135
  # Create Gradio Interface
136
  with gr.Blocks() as demo:
137
- gr.Markdown(f"# NOVA LLM Backend\nRunning model: {MODEL_ID}")
138
-
139
- if not LOADED_MODEL:
140
- gr.Markdown("⚠️ **WARNING**: Model failed to load. Using mock responses.")
141
 
142
  with gr.Tab("Chat"):
143
  with gr.Row():
 
3
  import json
4
  import logging
5
  import time
6
+ import random
7
 
8
  # Set up logging
9
  logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
10
  logger = logging.getLogger(__name__)
11
 
12
+ # Define mock responses for reliability
13
+ MOCK_RESPONSES = [
14
+ "I understand how you're feeling. Would you like to talk more about what's been going on?",
15
+ "That sounds challenging. I'm here to support you through this. What has been helping you cope so far?",
16
+ "Thank you for sharing that with me. How long have you been feeling this way?",
17
+ "I hear you. It's important to acknowledge these emotions. Would it help to explore some calming techniques?",
18
+ "I appreciate you opening up. Is there anything specific that triggers these feelings for you?",
19
+ "Your feelings are valid. What kind of support would be most helpful for you right now?",
20
+ "I'm here to listen without judgment. Would you like to talk more about this or would you prefer some suggestions?",
21
+ "It takes courage to express how you're feeling. How can I best support you today?",
22
+ "That's really challenging to deal with. Have you been able to talk to anyone else about this?",
23
+ "I'm glad you reached out. Sometimes just putting feelings into words can help us process them better."
24
+ ]
25
 
26
+ def select_mock_response(prompt):
27
+ """Select an appropriate mock response based on the input."""
28
+ # Simple keyword matching for a slightly more relevant response
29
+ if any(word in prompt.lower() for word in ["sad", "depress", "down", "unhappy"]):
30
+ return "I understand you're feeling down. Remember that emotions come and go, and you won't feel this way forever. What small activity might bring you a moment of joy today?"
 
 
31
 
32
+ elif any(word in prompt.lower() for word in ["anxious", "worry", "stress", "nervous"]):
33
+ return "I hear that anxiety is affecting you. Taking slow, deep breaths can help calm your nervous system. Would you like to try a quick breathing exercise together?"
 
34
 
35
+ elif any(word in prompt.lower() for word in ["angry", "mad", "frustrated", "upset"]):
36
+ return "It sounds like you're feeling frustrated. That's completely valid. Sometimes it helps to express these feelings in a safe way. What usually helps you process anger?"
 
 
 
 
 
37
 
38
+ elif any(word in prompt.lower() for word in ["happy", "joy", "excite", "great"]):
39
+ return "I'm glad to hear you're feeling positive! Moments of joy are worth celebrating. What contributed to this good feeling?"
40
+
41
+ elif any(word in prompt.lower() for word in ["confus", "uncertain", "lost"]):
42
+ return "Feeling uncertain can be uncomfortable. Breaking things down into smaller parts sometimes helps provide clarity. What specific aspect feels most confusing right now?"
43
+
44
+ elif "hello" in prompt.lower() or "hi" in prompt.lower():
45
+ return "Hello! I'm Nova, here to support your emotional wellbeing. How are you feeling today?"
46
 
 
 
 
 
 
 
 
 
 
 
47
  else:
48
+ # Return a random response if no keywords match
49
+ return random.choice(MOCK_RESPONSES)
50
 
51
+ def generate_response(prompt):
52
+ """Generate a response using our mock system."""
53
+ logger.info(f"Generating response for: {prompt[:30]}...")
54
+ start_time = time.time()
 
 
55
 
56
+ # Get appropriate mock response
57
+ response = select_mock_response(prompt)
 
 
 
58
 
59
+ # Simulate a brief delay for realism (but not too long)
60
+ time.sleep(0.5)
61
+
62
+ logger.info(f"Response generated in {time.time() - start_time:.2f} seconds")
63
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  # API endpoint that mimics your current backend API
66
  def chat_api(message):
 
81
  "timestamp": None # Will be filled by your backend
82
  },
83
  "meta": {
84
+ "model_used": "nova-rule-based",
85
  "debug": None
86
  }
87
  }
 
90
 
91
  # Create Gradio Interface
92
  with gr.Blocks() as demo:
93
+ gr.Markdown(f"# NOVA Rule-Based Backend\n**Note:** Using reliable rule-based responses for faster performance")
 
 
 
94
 
95
  with gr.Tab("Chat"):
96
  with gr.Row():
requirements.txt CHANGED
@@ -1,8 +1 @@
1
- gradio>=3.50.0
2
- transformers>=4.35.0
3
- torch>=2.0.0
4
- sentencepiece
5
- accelerate
6
- bitsandbytes>=0.39.0
7
- safetensors
8
- einops
 
1
+ gradio>=3.50.0