navidfalah commited on
Commit
fc8391d
·
verified ·
1 Parent(s): a2b8ec8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +283 -245
app.py CHANGED
@@ -1,292 +1,330 @@
1
- import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import torch
3
- from transformers import AutoTokenizer, AutoModelForCausalLM
4
- from huggingface_hub import login
5
  import os
6
- import subprocess
7
- import sys
8
-
9
- print("Starting 3AI application...")
10
 
11
- # Install required dependencies
12
- print("Installing required dependencies...")
13
- try:
14
- subprocess.check_call([sys.executable, "-m", "pip", "install", "sentencepiece", "protobuf", "peft", "--quiet"])
15
- print("Dependencies installed successfully!")
16
- except Exception as e:
17
- print(f"Warning: Could not install dependencies: {e}")
18
-
19
- # Import PEFT after installation
20
- try:
21
- from peft import PeftModel, PeftConfig
22
- print("PEFT imported successfully!")
23
- except ImportError as e:
24
- print(f"Could not import PEFT: {e}")
25
- print("Trying to install PEFT again...")
26
- try:
27
- subprocess.check_call([sys.executable, "-m", "pip", "install", "peft", "--force-reinstall"])
28
- from peft import PeftModel, PeftConfig
29
- print("PEFT installed and imported successfully!")
30
- except Exception as e2:
31
- print(f"Failed to install PEFT: {e2}")
32
- print("Continuing without PEFT - will try alternative approach")
33
- PeftModel = None
34
- PeftConfig = None
35
-
36
- # Login using the secret token
37
- token = os.getenv("HF_TOKEN")
38
- if token:
39
- login(token=token)
40
- print("Successfully logged in to Hugging Face!")
41
-
42
- # Use your own Hugging Face model
43
- original_mistral_model = "navidfalah/3ai" # Your model on Hugging Face
44
- adapter_path = "./model" # Your local LoRA adapter directory (if available)
45
 
46
- print(f"Loading original Mistral tokenizer from {original_mistral_model}...")
47
- try:
48
- # First try: Load with slow tokenizer from your model
49
- tokenizer = AutoTokenizer.from_pretrained(
50
- original_mistral_model,
51
- use_fast=False, # Use slow tokenizer to avoid issues
52
- force_download=True, # Force fresh download
53
- resume_download=False
54
- )
55
- print("Your model tokenizer loaded successfully!")
56
- except Exception as e:
57
- print(f"Error loading tokenizer from your model: {e}")
58
  try:
59
- # Second try: Use original Mistral tokenizer
60
- tokenizer = AutoTokenizer.from_pretrained(
61
- "mistralai/Mistral-7B-Instruct-v0.1",
62
- use_fast=False
63
- )
64
- print("Original Mistral tokenizer loaded successfully!")
65
- except Exception as e2:
66
- print(f"Error with original Mistral: {e2}")
67
- try:
68
- # Third try: Use different Mistral model version
69
- print("Trying Mistral-7B-Instruct-v0.2...")
70
- tokenizer = AutoTokenizer.from_pretrained(
71
- "mistralai/Mistral-7B-Instruct-v0.2",
72
- use_fast=False
73
- )
74
- print("Mistral v0.2 tokenizer loaded successfully!")
75
- except Exception as e3:
76
- print(f"Error with Mistral v0.2: {e3}")
77
- try:
78
- # Fourth try: Use compatible tokenizer
79
- print("Trying compatible tokenizer...")
80
- tokenizer = AutoTokenizer.from_pretrained(
81
- "microsoft/DialoGPT-medium",
82
- use_fast=False
83
- )
84
- print("Compatible tokenizer loaded successfully!")
85
- except Exception as e4:
86
- print(f"Error with compatible tokenizer: {e4}")
87
- try:
88
- # Fifth try: Use GPT-2 as fallback
89
- print("Using GPT-2 as fallback...")
90
- tokenizer = AutoTokenizer.from_pretrained("gpt2")
91
- print("GPT-2 tokenizer loaded successfully!")
92
- except Exception as e5:
93
- print(f"Cannot load any tokenizer: {e5}")
94
- print("Exiting - cannot proceed without tokenizer")
95
- exit(1)
96
-
97
- # Ensure tokenizer has proper tokens
98
- if tokenizer.pad_token is None:
99
- tokenizer.pad_token = tokenizer.eos_token
100
 
101
- print(f"Loading your model from {original_mistral_model}...")
102
- try:
103
- # Load your model from Hugging Face
104
- base_model = AutoModelForCausalLM.from_pretrained(
105
- original_mistral_model,
106
- torch_dtype=torch.float16,
107
- device_map="auto",
108
- low_cpu_mem_usage=True
109
- )
110
- print("Your model loaded successfully!")
111
 
112
- # Check if PEFT is available and try to load local adapter
113
- if PeftModel is not None and PeftConfig is not None:
114
- try:
115
- print(f"Trying to load local LoRA adapter from {adapter_path}...")
116
- model = PeftModel.from_pretrained(
117
- base_model,
118
- adapter_path,
119
- torch_dtype=torch.float16
120
- )
121
- print("Local LoRA adapter loaded successfully!")
122
- except Exception as adapter_error:
123
- print(f"Could not load local adapter: {adapter_error}")
124
- print("Using your base model without additional adapter")
125
- model = base_model
126
- else:
127
- print("PEFT not available - using your base model")
128
- model = base_model
129
 
130
- except Exception as e:
131
- print(f"Error loading your model: {e}")
132
- print("Trying to load original Mistral as fallback...")
133
  try:
134
- # Fallback to original Mistral
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  base_model = AutoModelForCausalLM.from_pretrained(
136
- "mistralai/Mistral-7B-Instruct-v0.1",
137
- torch_dtype=torch.float16,
138
  device_map="auto",
 
 
139
  low_cpu_mem_usage=True
140
  )
141
- print("Fallback Mistral model loaded!")
142
- model = base_model
143
- except Exception as e2:
144
- print(f"Cannot load any model: {e2}")
145
- print("Exiting - cannot proceed without model")
146
- exit(1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
- def chat_function(message):
149
- if not message or not message.strip():
150
- return "Please enter a message."
 
 
151
 
152
- # Clean and limit input
153
- message = message.strip()
154
- if len(message) > 500:
155
- return "Message too long! Please keep it under 500 characters."
 
 
 
 
 
156
 
157
  try:
158
- # Use flexible prompt format based on tokenizer type
159
- if hasattr(tokenizer, 'chat_template') or 'mistral' in tokenizer.name_or_path.lower():
160
- # Use Mistral format if it's actually Mistral
161
- prompt = f"<s>[INST] {message} [/INST]"
162
- else:
163
- # Use simple format for other tokenizers
164
- prompt = f"User: {message}\nAssistant:"
165
 
166
  # Tokenize input
167
  inputs = tokenizer(
168
- prompt,
169
- return_tensors='pt',
170
  truncation=True,
171
- max_length=400,
172
  padding=True
173
  )
174
- input_ids = inputs['input_ids']
175
- attention_mask = inputs.get('attention_mask', None)
176
 
177
- # Move to model device
178
- device = next(model.parameters()).device
179
- input_ids = input_ids.to(device)
180
- if attention_mask is not None:
181
- attention_mask = attention_mask.to(device)
 
 
182
 
183
  # Generate response
184
  with torch.no_grad():
185
- if torch.cuda.is_available():
186
- torch.cuda.empty_cache()
187
-
188
  outputs = model.generate(
189
- input_ids,
190
- max_new_tokens=200,
191
- temperature=0.7,
 
192
  do_sample=True,
193
- top_p=0.9,
194
- pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id else tokenizer.eos_token_id,
195
  eos_token_id=tokenizer.eos_token_id,
196
- attention_mask=attention_mask,
197
  repetition_penalty=1.1
198
  )
199
 
200
- # Extract and clean response
201
- if outputs.shape[1] > input_ids.shape[1]:
202
- response_ids = outputs[0][input_ids.shape[1]:]
203
- response = tokenizer.decode(response_ids, skip_special_tokens=True)
204
- else:
205
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
206
- response = response.replace(prompt, "").strip()
207
-
208
- # Clean up response
209
- response = response.strip()
210
-
211
- # Remove prompt artifacts
212
- for artifact in ["[/INST]", "[INST]", "Assistant:", "User:", "Human:"]:
213
- if artifact in response:
214
- response = response.split(artifact)[-1].strip()
215
 
216
- # Remove input if it appears in response
217
- if message.lower() in response.lower():
218
- response = response.replace(message, "").strip()
 
 
219
 
220
- # Ensure reasonable length
221
- if len(response) > 800:
222
- response = response[:800] + "..."
 
 
 
223
 
224
- # Fallback if empty
225
- if len(response.strip()) < 3:
226
- response = "I understand. How can I help you?"
227
-
228
- return response
229
 
230
  except Exception as e:
231
- return f"Error: {str(e)}"
 
 
 
 
 
 
232
 
233
- def clear_chat():
234
- return ""
 
 
 
 
 
 
235
 
236
- # Simple custom CSS
237
- css = """
238
- .gradio-container {
239
- max-width: 700px !important;
240
- margin: auto !important;
241
- }
242
- """
243
-
244
- # Create interface
245
- with gr.Blocks(title="3AI - Text Generation", css=css, theme=gr.themes.Default()) as demo:
246
- # Header
247
- gr.Markdown("""
248
- # 🤖 3AI Text Generator
249
- *Simple text-to-text generation with your navidfalah/3ai model*
250
- """)
251
 
252
- # Input
253
- with gr.Row():
254
- input_text = gr.Textbox(
255
- placeholder="Enter your text here...",
256
- label="Input Text",
257
- lines=2,
258
- max_lines=3
 
 
 
 
 
 
 
259
  )
260
-
261
- # Generate button
262
- with gr.Row():
263
- generate_btn = gr.Button("Generate", variant="primary", size="lg")
264
-
265
- # Output
266
- with gr.Row():
267
- output_text = gr.Textbox(
268
- label="Generated Text",
269
- lines=6,
270
- max_lines=10,
271
- interactive=False,
272
- placeholder="Generated text will appear here..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  )
274
 
275
- # Event handlers
276
- generate_btn.click(
277
- fn=chat_function,
278
- inputs=input_text,
279
- outputs=output_text
280
- )
281
-
282
- input_text.submit(
283
- fn=chat_function,
284
- inputs=input_text,
285
- outputs=output_text
286
- )
287
-
288
- # Footer
289
- gr.Markdown("---\n*navidfalah/3ai • Simple Text Generation*")
290
 
 
291
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  demo.launch()
 
1
+ def test_model():
2
+ """Simple test function to check if model is working."""
3
+ try:
4
+ model, tokenizer = load_model()
5
+ if model and tokenizer:
6
+ test_input = "Test: Rate my satisfaction with work at 5/10"
7
+ inputs = tokenizer(test_input, return_tensors="pt", max_length=50)
8
+ with torch.no_grad():
9
+ outputs = model.generate(**inputs, max_new_tokens=20)
10
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
11
+ print(f"Test successful! Output: {result}")
12
+ return True
13
+ return False
14
+ except Exception as e:
15
+ print(f"Test failed: {e}")
16
+ return Falseimport gradio as gr
17
  import torch
18
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
19
+ from peft import PeftModel
20
  import os
21
+ from typing import Tuple, Optional
 
 
 
22
 
23
+ # Configuration
24
+ class Config:
25
+ MODEL_PATH = "navidfalah/3ai" # Your HF model repo
26
+ BASE_MODEL = "mistralai/Mistral-7B-Instruct-v0.1" # Mistral base model
27
+ ADAPTER_PATH = "./model" # Local adapter path if needed
28
+ MAX_NEW_TOKENS = 1000 # Reduced for faster response
29
+ TEMPERATURE = 0.7
30
+ TOP_P = 0.9
31
+ MAX_INPUT_LENGTH = 512 # Reduced for faster processing
32
+
33
+ # Global variables for model and tokenizer
34
+ model = None
35
+ tokenizer = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ def test_model():
38
+ """Simple test function to check if model is working."""
 
 
 
 
 
 
 
 
 
 
39
  try:
40
+ model, tokenizer = load_model()
41
+ if model and tokenizer:
42
+ test_input = "Test: Rate my satisfaction with work at 5/10"
43
+ inputs = tokenizer(test_input, return_tensors="pt", max_length=50)
44
+ with torch.no_grad():
45
+ outputs = model.generate(**inputs, max_new_tokens=20)
46
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
47
+ print(f"Test successful! Output: {result}")
48
+ return True
49
+ return False
50
+ except Exception as e:
51
+ print(f"Test failed: {e}")
52
+ return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ def load_model() -> Tuple[Optional[object], Optional[object]]:
55
+ """Load the fine-tuned satisfaction analysis model."""
56
+ global model, tokenizer
 
 
 
 
 
 
 
57
 
58
+ if model is not None and tokenizer is not None:
59
+ return model, tokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
 
 
 
61
  try:
62
+ print("🔄 Loading Mistral model and tokenizer...")
63
+
64
+ # Load tokenizer from base model (Mistral)
65
+ tokenizer = AutoTokenizer.from_pretrained(Config.BASE_MODEL)
66
+ if tokenizer.pad_token is None:
67
+ tokenizer.pad_token = tokenizer.eos_token
68
+ tokenizer.padding_side = "left" # Change to left padding for generation
69
+
70
+ # Quantization config for efficient inference
71
+ bnb_config = BitsAndBytesConfig(
72
+ load_in_4bit=True,
73
+ bnb_4bit_use_double_quant=True,
74
+ bnb_4bit_quant_type="nf4",
75
+ bnb_4bit_compute_dtype=torch.float16
76
+ )
77
+
78
+ # Load base Mistral model
79
  base_model = AutoModelForCausalLM.from_pretrained(
80
+ Config.BASE_MODEL,
81
+ quantization_config=bnb_config,
82
  device_map="auto",
83
+ trust_remote_code=True,
84
+ torch_dtype=torch.float16,
85
  low_cpu_mem_usage=True
86
  )
87
+
88
+ # Try loading adapter from HF repo first
89
+ try:
90
+ model = PeftModel.from_pretrained(
91
+ base_model,
92
+ Config.MODEL_PATH,
93
+ is_trainable=False,
94
+ torch_dtype=torch.float16
95
+ )
96
+ print("✅ Loaded model from Hugging Face repo")
97
+ except Exception as e:
98
+ print(f"Could not load from HF: {e}")
99
+ # Fallback to local adapter if available
100
+ if os.path.exists(Config.ADAPTER_PATH):
101
+ model = PeftModel.from_pretrained(
102
+ base_model,
103
+ Config.ADAPTER_PATH,
104
+ is_trainable=False,
105
+ torch_dtype=torch.float16
106
+ )
107
+ print("✅ Loaded model from local adapter")
108
+ else:
109
+ # If no adapter found, use base model
110
+ model = base_model
111
+ print("⚠️ Using base model without adapter")
112
+
113
+ model.eval()
114
+ print("✅ Mistral-7B model loaded successfully!")
115
+ print(f"Device: {next(model.parameters()).device}")
116
+ return model, tokenizer
117
+
118
+ except Exception as e:
119
+ print(f"❌ Error loading model: {e}")
120
+ import traceback
121
+ traceback.print_exc()
122
+ return None, None
123
 
124
+ def analyze_satisfaction(user_input: str) -> str:
125
+ """Generate satisfaction analysis based on user input text."""
126
+
127
+ if not user_input or not user_input.strip():
128
+ return "⚠️ Please enter some text describing your life situation or what you'd like analyzed."
129
 
130
+ # Show loading message
131
+ yield "🔄 Loading model and analyzing your input... This may take a moment on first run."
132
+
133
+ # Load model if not already loaded
134
+ model, tokenizer = load_model()
135
+
136
+ if model is None or tokenizer is None:
137
+ yield "❌ Error: Could not load the model. Please check the model configuration and try again."
138
+ return
139
 
140
  try:
141
+ yield "🔍 Processing your input..."
142
+
143
+ # Prepare the prompt in Mistral format
144
+ formatted_prompt = f"[INST] {user_input} [/INST]"
 
 
 
145
 
146
  # Tokenize input
147
  inputs = tokenizer(
148
+ formatted_prompt,
149
+ return_tensors="pt",
150
  truncation=True,
151
+ max_length=Config.MAX_INPUT_LENGTH,
152
  padding=True
153
  )
 
 
154
 
155
+ # Move to GPU if available
156
+ device = "cuda" if torch.cuda.is_available() else "cpu"
157
+ if device == "cuda":
158
+ inputs = {k: v.to(device) for k, v in inputs.items()}
159
+ model.to(device)
160
+
161
+ yield "💭 Generating analysis..."
162
 
163
  # Generate response
164
  with torch.no_grad():
 
 
 
165
  outputs = model.generate(
166
+ **inputs,
167
+ max_new_tokens=Config.MAX_NEW_TOKENS,
168
+ temperature=Config.TEMPERATURE,
169
+ top_p=Config.TOP_P,
170
  do_sample=True,
171
+ pad_token_id=tokenizer.eos_token_id,
 
172
  eos_token_id=tokenizer.eos_token_id,
 
173
  repetition_penalty=1.1
174
  )
175
 
176
+ # Decode response
177
+ full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
+ # Extract generated text (remove input prompt)
180
+ if "[/INST]" in full_response:
181
+ generated_text = full_response.split("[/INST]")[-1].strip()
182
+ else:
183
+ generated_text = full_response[len(formatted_prompt):].strip()
184
 
185
+ # Format the output
186
+ formatted_output = "## 📊 Life Satisfaction Analysis\n\n"
187
+ if generated_text:
188
+ formatted_output += generated_text
189
+ else:
190
+ formatted_output += "I apologize, but I couldn't generate a proper analysis. Please try rephrasing your input or provide more details about your life situation."
191
 
192
+ yield formatted_output
 
 
 
 
193
 
194
  except Exception as e:
195
+ error_msg = f"Error during analysis: {str(e)}\n\n"
196
+ error_msg += "**Troubleshooting tips:**\n"
197
+ error_msg += "- Make sure the model is properly uploaded to Hugging Face\n"
198
+ error_msg += "- Check if the Space has enough resources (GPU/CPU)\n"
199
+ error_msg += "- Try with a shorter input text\n"
200
+ error_msg += f"- Current device: {'GPU' if torch.cuda.is_available() else 'CPU'}"
201
+ yield error_msg
202
 
203
+ # Example prompts for users
204
+ EXAMPLE_PROMPTS = [
205
+ "I'm a 29-year-old professional feeling burned out at work. My health is okay but I rarely exercise. Financially stable but not saving much. Great relationship with my partner. What's my life satisfaction score?",
206
+ "Rate my life satisfaction: Work is stressful (3/10), health is good (7/10), finances are tight (4/10), relationships are excellent (9/10). Give me a comprehensive analysis.",
207
+ "Analyze my satisfaction: Career going well, making good money, but no time for friends or hobbies. Always tired and stressed. How can I improve?",
208
+ "I'm happy with my job and relationships but struggling with debt and health issues. Need advice on balancing everything.",
209
+ "Just graduated, starting my career, living paycheck to paycheck, single but happy, very healthy and active. Analyze my life satisfaction."
210
+ ]
211
 
212
+ # Gradio Interface
213
+ def create_interface():
214
+ """Create the Gradio interface."""
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
+ with gr.Blocks(title="Life Satisfaction Analysis", theme=gr.themes.Soft()) as demo:
217
+ gr.Markdown(
218
+ """
219
+ # 🌟 AI Life Satisfaction Analyzer
220
+
221
+ This AI-powered tool analyzes your life satisfaction based on your description of your current situation.
222
+ Simply describe your life circumstances, challenges, and satisfaction levels across different areas.
223
+
224
+ **The AI will analyze:**
225
+ - Overall life satisfaction score
226
+ - Balance across life domains (work, health, finances, relationships)
227
+ - Personalized recommendations for improvement
228
+ - Action plans and strategies
229
+ """
230
  )
231
+
232
+ with gr.Row():
233
+ with gr.Column():
234
+ # Input section
235
+ input_text = gr.Textbox(
236
+ label="📝 Describe Your Current Life Situation",
237
+ placeholder="Tell me about your work, health, finances, relationships, and any other aspects of your life you'd like analyzed. You can include satisfaction ratings (1-10) or just describe how you feel about each area.",
238
+ lines=8,
239
+ max_lines=15
240
+ )
241
+
242
+ with gr.Row():
243
+ analyze_btn = gr.Button("🔍 Analyze My Life Satisfaction", variant="primary", scale=2)
244
+ clear_btn = gr.Button("🗑️ Clear", scale=1)
245
+
246
+ # Examples section
247
+ gr.Markdown("### 💡 Example Inputs")
248
+ example_dropdown = gr.Dropdown(
249
+ choices=EXAMPLE_PROMPTS,
250
+ label="Select an example to try:",
251
+ interactive=True
252
+ )
253
+
254
+ with gr.Row():
255
+ with gr.Column():
256
+ # Output section
257
+ output = gr.Markdown(label="Analysis Results")
258
+
259
+ # Event handlers
260
+ analyze_btn.click(
261
+ fn=analyze_satisfaction,
262
+ inputs=input_text,
263
+ outputs=output
264
+ )
265
+
266
+ clear_btn.click(
267
+ fn=lambda: ("", ""),
268
+ inputs=[],
269
+ outputs=[input_text, output]
270
+ )
271
+
272
+ example_dropdown.change(
273
+ fn=lambda x: x,
274
+ inputs=example_dropdown,
275
+ outputs=input_text
276
+ )
277
+
278
+ # Tips section
279
+ with gr.Accordion("���� Tips for Best Results", open=False):
280
+ gr.Markdown(
281
+ """
282
+ **How to get the most accurate analysis:**
283
+
284
+ 1. **Be specific** about your situation in each life area
285
+ 2. **Include ratings** (1-10) if you want quantified analysis
286
+ 3. **Mention your age** and life stage for context
287
+ 4. **Describe challenges** you're facing
288
+ 5. **Share your goals** or what you'd like to improve
289
+
290
+ **Example format:**
291
+ - Work: [Your situation and satisfaction level]
292
+ - Health: [Physical and mental wellness status]
293
+ - Finances: [Financial situation and concerns]
294
+ - Relationships: [Social and romantic relationships]
295
+ - Personal: [Hobbies, growth, fulfillment]
296
+ """
297
+ )
298
+
299
+ # Footer
300
+ gr.Markdown(
301
+ """
302
+ ---
303
+ 💡 **Disclaimer:** This AI tool provides general insights based on the information you provide.
304
+ For professional advice, please consult qualified experts in relevant fields.
305
+
306
+ 🔒 **Privacy:** Your input is processed in real-time and not stored.
307
+ """
308
  )
309
 
310
+ return demo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
 
312
+ # Launch the app
313
  if __name__ == "__main__":
314
+ # Check environment
315
+ print("🚀 Starting Life Satisfaction Analysis Tool...")
316
+ print(f"PyTorch version: {torch.__version__}")
317
+ print(f"CUDA available: {torch.cuda.is_available()}")
318
+ if torch.cuda.is_available():
319
+ print(f"CUDA device: {torch.cuda.get_device_name(0)}")
320
+
321
+ # Try to load model on startup (but don't fail if it doesn't work)
322
+ try:
323
+ load_model()
324
+ except Exception as e:
325
+ print(f"Note: Model will be loaded on first use. Error: {e}")
326
+
327
+ # Create and launch interface
328
+ demo = create_interface()
329
+ demo.queue() # Enable queue for streaming
330
  demo.launch()