Spaces:

navidfalah
/

3AI

Sleeping

App Files Files Community

navidfalah commited on Jul 4, 2025

Commit

e0b652f

verified ·

1 Parent(s): 608b95d

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -253

app.py CHANGED Viewed

@@ -1,315 +1,149 @@
 import gradio as gr
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
-from peft import PeftModel
 import os
-from typing import Tuple, Optional
-# Configuration
 class Config:
-    MODEL_PATH = "navidfalah/3ai"  # Your HF model repo
-    BASE_MODEL = "mistralai/Mistral-7B-Instruct-v0.1"  # Mistral base model
-    ADAPTER_PATH = "./model"  # Local adapter path if needed
-    MAX_NEW_TOKENS = 1000  # Reduced for faster response
     TEMPERATURE = 0.7
     TOP_P = 0.9
-    MAX_INPUT_LENGTH = 512  # Reduced for faster processing
-# Global variables for model and tokenizer
 model = None
 tokenizer = None
-def test_model():
-    """Simple test function to check if model is working."""
-    try:
-        model, tokenizer = load_model()
-        if model and tokenizer:
-            test_input = "Test: Rate my satisfaction with work at 5/10"
-            inputs = tokenizer(test_input, return_tensors="pt", max_length=50)
-            with torch.no_grad():
-                outputs = model.generate(**inputs, max_new_tokens=20)
-            result = tokenizer.decode(outputs[0], skip_special_tokens=True)
-            print(f"Test successful! Output: {result}")
-            return True
-        return False
-    except Exception as e:
-        print(f"Test failed: {e}")
-        return False
-def load_model() -> Tuple[Optional[object], Optional[object]]:
-    """Load the fine-tuned satisfaction analysis model."""
     global model, tokenizer
     if model is not None and tokenizer is not None:
         return model, tokenizer
     try:
-        print("🔄 Loading Mistral model and tokenizer...")
-        # Load tokenizer from base model (Mistral)
         tokenizer = AutoTokenizer.from_pretrained(Config.BASE_MODEL)
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
-            tokenizer.padding_side = "left"  # Change to left padding for generation
-        # Quantization config for efficient inference
-        bnb_config = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_use_double_quant=True,
-            bnb_4bit_quant_type="nf4",
-            bnb_4bit_compute_dtype=torch.float16
-        )
-        # Load base Mistral model
-        base_model = AutoModelForCausalLM.from_pretrained(
             Config.BASE_MODEL,
-            quantization_config=bnb_config,
-            device_map="auto",
-            trust_remote_code=True,
-            torch_dtype=torch.float16,
-            low_cpu_mem_usage=True
         )
-        # Try loading adapter from HF repo first
-        try:
-            model = PeftModel.from_pretrained(
-                base_model,
-                Config.MODEL_PATH,
-                is_trainable=False,
-                torch_dtype=torch.float16
-            )
-            print("✅ Loaded model from Hugging Face repo")
-        except Exception as e:
-            print(f"Could not load from HF: {e}")
-            # Fallback to local adapter if available
-            if os.path.exists(Config.ADAPTER_PATH):
-                model = PeftModel.from_pretrained(
-                    base_model,
-                    Config.ADAPTER_PATH,
-                    is_trainable=False,
-                    torch_dtype=torch.float16
-                )
-                print("✅ Loaded model from local adapter")
-            else:
-                # If no adapter found, use base model
-                model = base_model
-                print("⚠️ Using base model without adapter")
         model.eval()
-        print("✅ Mistral-7B model loaded successfully!")
-        print(f"Device: {next(model.parameters()).device}")
         return model, tokenizer
     except Exception as e:
-        print(f"❌ Error loading model: {e}")
-        import traceback
-        traceback.print_exc()
-        return None, None
-def analyze_satisfaction(user_input: str) -> str:
-    """Generate satisfaction analysis based on user input text."""
-    if not user_input or not user_input.strip():
-        return "⚠️ Please enter some text describing your life situation or what you'd like analyzed."
-    # Show loading message
-    yield "🔄 Loading model and analyzing your input... This may take a moment on first run."
-    # Load model if not already loaded
-    model, tokenizer = load_model()
     if model is None or tokenizer is None:
-        yield "❌ Error: Could not load the model. Please check the model configuration and try again."
-        return
     try:
-        yield "🔍 Processing your input..."
-        # Prepare the prompt in Mistral format
-        formatted_prompt = f"[INST] {user_input} [/INST]"
-        # Tokenize input
         inputs = tokenizer(
-            formatted_prompt,
             return_tensors="pt",
             truncation=True,
-            max_length=Config.MAX_INPUT_LENGTH,
-            padding=True
         )
-        # Move to GPU if available
-        device = "cuda" if torch.cuda.is_available() else "cpu"
-        if device == "cuda":
-            inputs = {k: v.to(device) for k, v in inputs.items()}
-            model.to(device)
-        yield "💭 Generating analysis..."
-        # Generate response
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
                 max_new_tokens=Config.MAX_NEW_TOKENS,
                 temperature=Config.TEMPERATURE,
-                top_p=Config.TOP_P,
                 do_sample=True,
                 pad_token_id=tokenizer.eos_token_id,
-                eos_token_id=tokenizer.eos_token_id,
-                repetition_penalty=1.1
             )
-        # Decode response
-        full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract generated text (remove input prompt)
-        if "[/INST]" in full_response:
-            generated_text = full_response.split("[/INST]")[-1].strip()
-        else:
-            generated_text = full_response[len(formatted_prompt):].strip()
-        # Format the output
-        formatted_output = "## 📊 Life Satisfaction Analysis\n\n"
-        if generated_text:
-            formatted_output += generated_text
-        else:
-            formatted_output += "I apologize, but I couldn't generate a proper analysis. Please try rephrasing your input or provide more details about your life situation."
-        yield formatted_output
     except Exception as e:
-        error_msg = f"❌ Error during analysis: {str(e)}\n\n"
-        error_msg += "**Troubleshooting tips:**\n"
-        error_msg += "- Make sure the model is properly uploaded to Hugging Face\n"
-        error_msg += "- Check if the Space has enough resources (GPU/CPU)\n"
-        error_msg += "- Try with a shorter input text\n"
-        error_msg += f"- Current device: {'GPU' if torch.cuda.is_available() else 'CPU'}"
-        yield error_msg
-# Example prompts for users
-EXAMPLE_PROMPTS = [
-    "I'm a 29-year-old professional feeling burned out at work. My health is okay but I rarely exercise. Financially stable but not saving much. Great relationship with my partner. What's my life satisfaction score?",
-    "Rate my life satisfaction: Work is stressful (3/10), health is good (7/10), finances are tight (4/10), relationships are excellent (9/10). Give me a comprehensive analysis.",
-    "Analyze my satisfaction: Career going well, making good money, but no time for friends or hobbies. Always tired and stressed. How can I improve?",
-    "I'm happy with my job and relationships but struggling with debt and health issues. Need advice on balancing everything.",
-    "Just graduated, starting my career, living paycheck to paycheck, single but happy, very healthy and active. Analyze my life satisfaction."
-]
-# Gradio Interface
-def create_interface():
-    """Create the Gradio interface."""
-    with gr.Blocks(title="Life Satisfaction Analysis", theme=gr.themes.Soft()) as demo:
-        gr.Markdown(
-            """
-            # 🌟 AI Life Satisfaction Analyzer
-            This AI-powered tool analyzes your life satisfaction based on your description of your current situation.
-            Simply describe your life circumstances, challenges, and satisfaction levels across different areas.
-            **The AI will analyze:**
-            - Overall life satisfaction score
-            - Balance across life domains (work, health, finances, relationships)
-            - Personalized recommendations for improvement
-            - Action plans and strategies
-            """
-        )
-        with gr.Row():
-            with gr.Column():
-                # Input section
-                input_text = gr.Textbox(
-                    label="📝 Describe Your Current Life Situation",
-                    placeholder="Tell me about your work, health, finances, relationships, and any other aspects of your life you'd like analyzed. You can include satisfaction ratings (1-10) or just describe how you feel about each area.",
-                    lines=8,
-                    max_lines=15
-                )
-                with gr.Row():
-                    analyze_btn = gr.Button("🔍 Analyze My Life Satisfaction", variant="primary", scale=2)
-                    clear_btn = gr.Button("🗑️ Clear", scale=1)
-                # Examples section
-                gr.Markdown("### 💡 Example Inputs")
-                example_dropdown = gr.Dropdown(
-                    choices=EXAMPLE_PROMPTS,
-                    label="Select an example to try:",
-                    interactive=True
-                )
-        with gr.Row():
-            with gr.Column():
-                # Output section
-                output = gr.Markdown(label="Analysis Results")
-        # Event handlers
-        analyze_btn.click(
-            fn=analyze_satisfaction,
-            inputs=input_text,
-            outputs=output
-        )
-        clear_btn.click(
-            fn=lambda: ("", ""),
-            inputs=[],
-            outputs=[input_text, output]
-        )
-        example_dropdown.change(
-            fn=lambda x: x,
-            inputs=example_dropdown,
-            outputs=input_text
-        )
-        # Tips section
-        with gr.Accordion("📖 Tips for Best Results", open=False):
-            gr.Markdown(
-                """
-                **How to get the most accurate analysis:**
-                1. **Be specific** about your situation in each life area
-                2. **Include ratings** (1-10) if you want quantified analysis
-                3. **Mention your age** and life stage for context
-                4. **Describe challenges** you're facing
-                5. **Share your goals** or what you'd like to improve
-                **Example format:**
-                - Work: [Your situation and satisfaction level]
-                - Health: [Physical and mental wellness status]
-                - Finances: [Financial situation and concerns]
-                - Relationships: [Social and romantic relationships]
-                - Personal: [Hobbies, growth, fulfillment]
-                """
             )
-        # Footer
-        gr.Markdown(
-            """
-            ---
-            💡 **Disclaimer:** This AI tool provides general insights based on the information you provide.
-            For professional advice, please consult qualified experts in relevant fields.
-            🔒 **Privacy:** Your input is processed in real-time and not stored.
-            """
-        )
-    return demo
-# Launch the app
 if __name__ == "__main__":
-    # Check environment
-    print("🚀 Starting Life Satisfaction Analysis Tool...")
-    print(f"PyTorch version: {torch.__version__}")
-    print(f"CUDA available: {torch.cuda.is_available()}")
-    if torch.cuda.is_available():
-        print(f"CUDA device: {torch.cuda.get_device_name(0)}")
-    # Try to load model on startup (but don't fail if it doesn't work)
-    try:
-        load_model()
-    except Exception as e:
-        print(f"Note: Model will be loaded on first use. Error: {e}")
-    # Create and launch interface
-    demo = create_interface()
-    demo.queue()  # Enable queue for streaming
     demo.launch()

 import gradio as gr
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
 import os
+# Configuration for CPU optimization
 class Config:
+    MODEL_PATH = "navidfalah/3ai"
+    BASE_MODEL = "mistralai/Mistral-7B-Instruct-v0.1"
+    MAX_NEW_TOKENS = 150  # Much shorter for faster generation
     TEMPERATURE = 0.7
     TOP_P = 0.9
+    MAX_INPUT_LENGTH = 256  # Shorter input for faster processing
+# Global variables
 model = None
 tokenizer = None
+def load_model_cpu_optimized():
+    """Load model optimized for CPU inference."""
     global model, tokenizer
     if model is not None and tokenizer is not None:
         return model, tokenizer
     try:
+        print("Loading tokenizer...")
         tokenizer = AutoTokenizer.from_pretrained(Config.BASE_MODEL)
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
+        print("Loading model for CPU...")
+        # Load in float32 for CPU (no quantization)
+        model = AutoModelForCausalLM.from_pretrained(
             Config.BASE_MODEL,
+            torch_dtype=torch.float32,  # Use float32 for CPU
+            low_cpu_mem_usage=True,
+            device_map="cpu"  # Force CPU
         )
         model.eval()
+        print("✅ Model loaded on CPU")
         return model, tokenizer
     except Exception as e:
+        print(f"Error loading model: {e}")
+        # Try a smaller model as fallback
+        try:
+            print("Trying smaller model fallback...")
+            model = AutoModelForCausalLM.from_pretrained(
+                "gpt2",  # Much smaller fallback model
+                torch_dtype=torch.float32
+            )
+            tokenizer = AutoTokenizer.from_pretrained("gpt2")
+            tokenizer.pad_token = tokenizer.eos_token
+            model.eval()
+            print("✅ Loaded fallback model (GPT-2)")
+            return model, tokenizer
+        except:
+            return None, None
+def analyze_text(user_input):
+    """Simple and fast text analysis."""
+    if not user_input.strip():
+        return "Please enter some text to analyze."
+    model, tokenizer = load_model_cpu_optimized()
     if model is None or tokenizer is None:
+        return "Error: Could not load model. Please try again."
     try:
+        # Simple prompt - no complex formatting
+        prompt = f"Analyze this life situation and provide brief advice: {user_input}\n\nAnalysis:"
+        # Tokenize with minimal length
         inputs = tokenizer(
+            prompt,
             return_tensors="pt",
             truncation=True,
+            max_length=Config.MAX_INPUT_LENGTH
         )
+        # Generate with aggressive settings for speed
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
                 max_new_tokens=Config.MAX_NEW_TOKENS,
                 temperature=Config.TEMPERATURE,
                 do_sample=True,
                 pad_token_id=tokenizer.eos_token_id,
+                early_stopping=True,  # Stop as soon as possible
+                num_beams=1  # No beam search for speed
             )
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Extract only the generated part
+        result = response[len(prompt):].strip()
+        if not result:
+            result = "Analysis: Based on your input, I recommend focusing on balance and gradual improvements."
+        return result
     except Exception as e:
+        return f"Error: {str(e)}"
+# Simple Gradio Interface
+with gr.Blocks(title="Quick Life Analysis", css="footer {display: none !important}") as demo:
+    gr.Markdown("# Quick Life Satisfaction Analysis")
+    gr.Markdown("Enter your situation and get instant AI advice (optimized for CPU)")
+    with gr.Row():
+        with gr.Column():
+            input_text = gr.Textbox(
+                label="Your Input",
+                placeholder="Example: I'm stressed at work (3/10) but happy with family (8/10)...",
+                lines=4
             )
+            submit_btn = gr.Button("Analyze", variant="primary")
+        with gr.Column():
+            output_text = gr.Textbox(
+                label="AI Analysis",
+                lines=6,
+                interactive=False
+            )
+    # Simple examples
+    gr.Examples(
+        examples=[
+            "Work stress is high, health is okay, finances tight",
+            "Happy with job but no work-life balance",
+            "Good health and relationships but career is stagnant"
+        ],
+        inputs=input_text
+    )
+    submit_btn.click(
+        fn=analyze_text,
+        inputs=input_text,
+        outputs=output_text
+    )
 if __name__ == "__main__":
+    print("Starting CPU-optimized app...")
+    print("Note: First generation will be slow due to model loading")
     demo.launch()