navidfalah commited on
Commit
e0b652f
·
verified ·
1 Parent(s): 608b95d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -253
app.py CHANGED
@@ -1,315 +1,149 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
4
- from peft import PeftModel
5
  import os
6
- from typing import Tuple, Optional
7
 
8
- # Configuration
9
  class Config:
10
- MODEL_PATH = "navidfalah/3ai" # Your HF model repo
11
- BASE_MODEL = "mistralai/Mistral-7B-Instruct-v0.1" # Mistral base model
12
- ADAPTER_PATH = "./model" # Local adapter path if needed
13
- MAX_NEW_TOKENS = 1000 # Reduced for faster response
14
  TEMPERATURE = 0.7
15
  TOP_P = 0.9
16
- MAX_INPUT_LENGTH = 512 # Reduced for faster processing
17
 
18
- # Global variables for model and tokenizer
19
  model = None
20
  tokenizer = None
21
 
22
- def test_model():
23
- """Simple test function to check if model is working."""
24
- try:
25
- model, tokenizer = load_model()
26
- if model and tokenizer:
27
- test_input = "Test: Rate my satisfaction with work at 5/10"
28
- inputs = tokenizer(test_input, return_tensors="pt", max_length=50)
29
- with torch.no_grad():
30
- outputs = model.generate(**inputs, max_new_tokens=20)
31
- result = tokenizer.decode(outputs[0], skip_special_tokens=True)
32
- print(f"Test successful! Output: {result}")
33
- return True
34
- return False
35
- except Exception as e:
36
- print(f"Test failed: {e}")
37
- return False
38
-
39
- def load_model() -> Tuple[Optional[object], Optional[object]]:
40
- """Load the fine-tuned satisfaction analysis model."""
41
  global model, tokenizer
42
 
43
  if model is not None and tokenizer is not None:
44
  return model, tokenizer
45
 
46
  try:
47
- print("🔄 Loading Mistral model and tokenizer...")
48
-
49
- # Load tokenizer from base model (Mistral)
50
  tokenizer = AutoTokenizer.from_pretrained(Config.BASE_MODEL)
51
  if tokenizer.pad_token is None:
52
  tokenizer.pad_token = tokenizer.eos_token
53
- tokenizer.padding_side = "left" # Change to left padding for generation
54
 
55
- # Quantization config for efficient inference
56
- bnb_config = BitsAndBytesConfig(
57
- load_in_4bit=True,
58
- bnb_4bit_use_double_quant=True,
59
- bnb_4bit_quant_type="nf4",
60
- bnb_4bit_compute_dtype=torch.float16
61
- )
62
-
63
- # Load base Mistral model
64
- base_model = AutoModelForCausalLM.from_pretrained(
65
  Config.BASE_MODEL,
66
- quantization_config=bnb_config,
67
- device_map="auto",
68
- trust_remote_code=True,
69
- torch_dtype=torch.float16,
70
- low_cpu_mem_usage=True
71
  )
72
 
73
- # Try loading adapter from HF repo first
74
- try:
75
- model = PeftModel.from_pretrained(
76
- base_model,
77
- Config.MODEL_PATH,
78
- is_trainable=False,
79
- torch_dtype=torch.float16
80
- )
81
- print("✅ Loaded model from Hugging Face repo")
82
- except Exception as e:
83
- print(f"Could not load from HF: {e}")
84
- # Fallback to local adapter if available
85
- if os.path.exists(Config.ADAPTER_PATH):
86
- model = PeftModel.from_pretrained(
87
- base_model,
88
- Config.ADAPTER_PATH,
89
- is_trainable=False,
90
- torch_dtype=torch.float16
91
- )
92
- print("✅ Loaded model from local adapter")
93
- else:
94
- # If no adapter found, use base model
95
- model = base_model
96
- print("⚠️ Using base model without adapter")
97
-
98
  model.eval()
99
- print("✅ Mistral-7B model loaded successfully!")
100
- print(f"Device: {next(model.parameters()).device}")
101
  return model, tokenizer
102
 
103
  except Exception as e:
104
- print(f"Error loading model: {e}")
105
- import traceback
106
- traceback.print_exc()
107
- return None, None
 
 
 
 
 
 
 
 
 
 
 
108
 
109
- def analyze_satisfaction(user_input: str) -> str:
110
- """Generate satisfaction analysis based on user input text."""
111
-
112
- if not user_input or not user_input.strip():
113
- return "⚠️ Please enter some text describing your life situation or what you'd like analyzed."
114
 
115
- # Show loading message
116
- yield "🔄 Loading model and analyzing your input... This may take a moment on first run."
117
-
118
- # Load model if not already loaded
119
- model, tokenizer = load_model()
120
 
121
  if model is None or tokenizer is None:
122
- yield "Error: Could not load the model. Please check the model configuration and try again."
123
- return
124
 
125
  try:
126
- yield "🔍 Processing your input..."
127
-
128
- # Prepare the prompt in Mistral format
129
- formatted_prompt = f"[INST] {user_input} [/INST]"
130
 
131
- # Tokenize input
132
  inputs = tokenizer(
133
- formatted_prompt,
134
  return_tensors="pt",
135
  truncation=True,
136
- max_length=Config.MAX_INPUT_LENGTH,
137
- padding=True
138
  )
139
 
140
- # Move to GPU if available
141
- device = "cuda" if torch.cuda.is_available() else "cpu"
142
- if device == "cuda":
143
- inputs = {k: v.to(device) for k, v in inputs.items()}
144
- model.to(device)
145
-
146
- yield "💭 Generating analysis..."
147
-
148
- # Generate response
149
  with torch.no_grad():
150
  outputs = model.generate(
151
  **inputs,
152
  max_new_tokens=Config.MAX_NEW_TOKENS,
153
  temperature=Config.TEMPERATURE,
154
- top_p=Config.TOP_P,
155
  do_sample=True,
156
  pad_token_id=tokenizer.eos_token_id,
157
- eos_token_id=tokenizer.eos_token_id,
158
- repetition_penalty=1.1
159
  )
160
 
161
- # Decode response
162
- full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
163
 
164
- # Extract generated text (remove input prompt)
165
- if "[/INST]" in full_response:
166
- generated_text = full_response.split("[/INST]")[-1].strip()
167
- else:
168
- generated_text = full_response[len(formatted_prompt):].strip()
169
 
170
- # Format the output
171
- formatted_output = "## 📊 Life Satisfaction Analysis\n\n"
172
- if generated_text:
173
- formatted_output += generated_text
174
- else:
175
- formatted_output += "I apologize, but I couldn't generate a proper analysis. Please try rephrasing your input or provide more details about your life situation."
176
 
177
- yield formatted_output
178
 
179
  except Exception as e:
180
- error_msg = f"Error during analysis: {str(e)}\n\n"
181
- error_msg += "**Troubleshooting tips:**\n"
182
- error_msg += "- Make sure the model is properly uploaded to Hugging Face\n"
183
- error_msg += "- Check if the Space has enough resources (GPU/CPU)\n"
184
- error_msg += "- Try with a shorter input text\n"
185
- error_msg += f"- Current device: {'GPU' if torch.cuda.is_available() else 'CPU'}"
186
- yield error_msg
187
 
188
- # Example prompts for users
189
- EXAMPLE_PROMPTS = [
190
- "I'm a 29-year-old professional feeling burned out at work. My health is okay but I rarely exercise. Financially stable but not saving much. Great relationship with my partner. What's my life satisfaction score?",
191
- "Rate my life satisfaction: Work is stressful (3/10), health is good (7/10), finances are tight (4/10), relationships are excellent (9/10). Give me a comprehensive analysis.",
192
- "Analyze my satisfaction: Career going well, making good money, but no time for friends or hobbies. Always tired and stressed. How can I improve?",
193
- "I'm happy with my job and relationships but struggling with debt and health issues. Need advice on balancing everything.",
194
- "Just graduated, starting my career, living paycheck to paycheck, single but happy, very healthy and active. Analyze my life satisfaction."
195
- ]
196
-
197
- # Gradio Interface
198
- def create_interface():
199
- """Create the Gradio interface."""
200
 
201
- with gr.Blocks(title="Life Satisfaction Analysis", theme=gr.themes.Soft()) as demo:
202
- gr.Markdown(
203
- """
204
- # 🌟 AI Life Satisfaction Analyzer
205
-
206
- This AI-powered tool analyzes your life satisfaction based on your description of your current situation.
207
- Simply describe your life circumstances, challenges, and satisfaction levels across different areas.
208
-
209
- **The AI will analyze:**
210
- - Overall life satisfaction score
211
- - Balance across life domains (work, health, finances, relationships)
212
- - Personalized recommendations for improvement
213
- - Action plans and strategies
214
- """
215
- )
216
-
217
- with gr.Row():
218
- with gr.Column():
219
- # Input section
220
- input_text = gr.Textbox(
221
- label="📝 Describe Your Current Life Situation",
222
- placeholder="Tell me about your work, health, finances, relationships, and any other aspects of your life you'd like analyzed. You can include satisfaction ratings (1-10) or just describe how you feel about each area.",
223
- lines=8,
224
- max_lines=15
225
- )
226
-
227
- with gr.Row():
228
- analyze_btn = gr.Button("🔍 Analyze My Life Satisfaction", variant="primary", scale=2)
229
- clear_btn = gr.Button("🗑️ Clear", scale=1)
230
-
231
- # Examples section
232
- gr.Markdown("### 💡 Example Inputs")
233
- example_dropdown = gr.Dropdown(
234
- choices=EXAMPLE_PROMPTS,
235
- label="Select an example to try:",
236
- interactive=True
237
- )
238
-
239
- with gr.Row():
240
- with gr.Column():
241
- # Output section
242
- output = gr.Markdown(label="Analysis Results")
243
-
244
- # Event handlers
245
- analyze_btn.click(
246
- fn=analyze_satisfaction,
247
- inputs=input_text,
248
- outputs=output
249
- )
250
-
251
- clear_btn.click(
252
- fn=lambda: ("", ""),
253
- inputs=[],
254
- outputs=[input_text, output]
255
- )
256
-
257
- example_dropdown.change(
258
- fn=lambda x: x,
259
- inputs=example_dropdown,
260
- outputs=input_text
261
- )
262
-
263
- # Tips section
264
- with gr.Accordion("📖 Tips for Best Results", open=False):
265
- gr.Markdown(
266
- """
267
- **How to get the most accurate analysis:**
268
-
269
- 1. **Be specific** about your situation in each life area
270
- 2. **Include ratings** (1-10) if you want quantified analysis
271
- 3. **Mention your age** and life stage for context
272
- 4. **Describe challenges** you're facing
273
- 5. **Share your goals** or what you'd like to improve
274
-
275
- **Example format:**
276
- - Work: [Your situation and satisfaction level]
277
- - Health: [Physical and mental wellness status]
278
- - Finances: [Financial situation and concerns]
279
- - Relationships: [Social and romantic relationships]
280
- - Personal: [Hobbies, growth, fulfillment]
281
- """
282
  )
 
283
 
284
- # Footer
285
- gr.Markdown(
286
- """
287
- ---
288
- 💡 **Disclaimer:** This AI tool provides general insights based on the information you provide.
289
- For professional advice, please consult qualified experts in relevant fields.
290
-
291
- 🔒 **Privacy:** Your input is processed in real-time and not stored.
292
- """
293
- )
 
 
 
 
 
 
294
 
295
- return demo
 
 
 
 
296
 
297
- # Launch the app
298
  if __name__ == "__main__":
299
- # Check environment
300
- print("🚀 Starting Life Satisfaction Analysis Tool...")
301
- print(f"PyTorch version: {torch.__version__}")
302
- print(f"CUDA available: {torch.cuda.is_available()}")
303
- if torch.cuda.is_available():
304
- print(f"CUDA device: {torch.cuda.get_device_name(0)}")
305
-
306
- # Try to load model on startup (but don't fail if it doesn't work)
307
- try:
308
- load_model()
309
- except Exception as e:
310
- print(f"Note: Model will be loaded on first use. Error: {e}")
311
-
312
- # Create and launch interface
313
- demo = create_interface()
314
- demo.queue() # Enable queue for streaming
315
  demo.launch()
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
 
4
  import os
 
5
 
6
+ # Configuration for CPU optimization
7
  class Config:
8
+ MODEL_PATH = "navidfalah/3ai"
9
+ BASE_MODEL = "mistralai/Mistral-7B-Instruct-v0.1"
10
+ MAX_NEW_TOKENS = 150 # Much shorter for faster generation
 
11
  TEMPERATURE = 0.7
12
  TOP_P = 0.9
13
+ MAX_INPUT_LENGTH = 256 # Shorter input for faster processing
14
 
15
+ # Global variables
16
  model = None
17
  tokenizer = None
18
 
19
+ def load_model_cpu_optimized():
20
+ """Load model optimized for CPU inference."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  global model, tokenizer
22
 
23
  if model is not None and tokenizer is not None:
24
  return model, tokenizer
25
 
26
  try:
27
+ print("Loading tokenizer...")
 
 
28
  tokenizer = AutoTokenizer.from_pretrained(Config.BASE_MODEL)
29
  if tokenizer.pad_token is None:
30
  tokenizer.pad_token = tokenizer.eos_token
 
31
 
32
+ print("Loading model for CPU...")
33
+ # Load in float32 for CPU (no quantization)
34
+ model = AutoModelForCausalLM.from_pretrained(
 
 
 
 
 
 
 
35
  Config.BASE_MODEL,
36
+ torch_dtype=torch.float32, # Use float32 for CPU
37
+ low_cpu_mem_usage=True,
38
+ device_map="cpu" # Force CPU
 
 
39
  )
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  model.eval()
42
+ print("✅ Model loaded on CPU")
 
43
  return model, tokenizer
44
 
45
  except Exception as e:
46
+ print(f"Error loading model: {e}")
47
+ # Try a smaller model as fallback
48
+ try:
49
+ print("Trying smaller model fallback...")
50
+ model = AutoModelForCausalLM.from_pretrained(
51
+ "gpt2", # Much smaller fallback model
52
+ torch_dtype=torch.float32
53
+ )
54
+ tokenizer = AutoTokenizer.from_pretrained("gpt2")
55
+ tokenizer.pad_token = tokenizer.eos_token
56
+ model.eval()
57
+ print("✅ Loaded fallback model (GPT-2)")
58
+ return model, tokenizer
59
+ except:
60
+ return None, None
61
 
62
+ def analyze_text(user_input):
63
+ """Simple and fast text analysis."""
64
+ if not user_input.strip():
65
+ return "Please enter some text to analyze."
 
66
 
67
+ model, tokenizer = load_model_cpu_optimized()
 
 
 
 
68
 
69
  if model is None or tokenizer is None:
70
+ return "Error: Could not load model. Please try again."
 
71
 
72
  try:
73
+ # Simple prompt - no complex formatting
74
+ prompt = f"Analyze this life situation and provide brief advice: {user_input}\n\nAnalysis:"
 
 
75
 
76
+ # Tokenize with minimal length
77
  inputs = tokenizer(
78
+ prompt,
79
  return_tensors="pt",
80
  truncation=True,
81
+ max_length=Config.MAX_INPUT_LENGTH
 
82
  )
83
 
84
+ # Generate with aggressive settings for speed
 
 
 
 
 
 
 
 
85
  with torch.no_grad():
86
  outputs = model.generate(
87
  **inputs,
88
  max_new_tokens=Config.MAX_NEW_TOKENS,
89
  temperature=Config.TEMPERATURE,
 
90
  do_sample=True,
91
  pad_token_id=tokenizer.eos_token_id,
92
+ early_stopping=True, # Stop as soon as possible
93
+ num_beams=1 # No beam search for speed
94
  )
95
 
96
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
97
 
98
+ # Extract only the generated part
99
+ result = response[len(prompt):].strip()
 
 
 
100
 
101
+ if not result:
102
+ result = "Analysis: Based on your input, I recommend focusing on balance and gradual improvements."
 
 
 
 
103
 
104
+ return result
105
 
106
  except Exception as e:
107
+ return f"Error: {str(e)}"
 
 
 
 
 
 
108
 
109
+ # Simple Gradio Interface
110
+ with gr.Blocks(title="Quick Life Analysis", css="footer {display: none !important}") as demo:
111
+ gr.Markdown("# Quick Life Satisfaction Analysis")
112
+ gr.Markdown("Enter your situation and get instant AI advice (optimized for CPU)")
 
 
 
 
 
 
 
 
113
 
114
+ with gr.Row():
115
+ with gr.Column():
116
+ input_text = gr.Textbox(
117
+ label="Your Input",
118
+ placeholder="Example: I'm stressed at work (3/10) but happy with family (8/10)...",
119
+ lines=4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  )
121
+ submit_btn = gr.Button("Analyze", variant="primary")
122
 
123
+ with gr.Column():
124
+ output_text = gr.Textbox(
125
+ label="AI Analysis",
126
+ lines=6,
127
+ interactive=False
128
+ )
129
+
130
+ # Simple examples
131
+ gr.Examples(
132
+ examples=[
133
+ "Work stress is high, health is okay, finances tight",
134
+ "Happy with job but no work-life balance",
135
+ "Good health and relationships but career is stagnant"
136
+ ],
137
+ inputs=input_text
138
+ )
139
 
140
+ submit_btn.click(
141
+ fn=analyze_text,
142
+ inputs=input_text,
143
+ outputs=output_text
144
+ )
145
 
 
146
  if __name__ == "__main__":
147
+ print("Starting CPU-optimized app...")
148
+ print("Note: First generation will be slow due to model loading")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  demo.launch()