Spaces:

navidfalah
/

3AI

Sleeping

App Files Files Community

navidfalah commited on Jul 4, 2025

Commit

fc8391d

verified ·

1 Parent(s): a2b8ec8

Update app.py

Browse files

Files changed (1) hide show

app.py +283 -245

app.py CHANGED Viewed

@@ -1,292 +1,330 @@
-import gradio as gr
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-from huggingface_hub import login
 import os
-import subprocess
-import sys
-print("Starting 3AI application...")
-# Install required dependencies
-print("Installing required dependencies...")
-try:
-    subprocess.check_call([sys.executable, "-m", "pip", "install", "sentencepiece", "protobuf", "peft", "--quiet"])
-    print("Dependencies installed successfully!")
-except Exception as e:
-    print(f"Warning: Could not install dependencies: {e}")
-# Import PEFT after installation
-try:
-    from peft import PeftModel, PeftConfig
-    print("PEFT imported successfully!")
-except ImportError as e:
-    print(f"Could not import PEFT: {e}")
-    print("Trying to install PEFT again...")
-    try:
-        subprocess.check_call([sys.executable, "-m", "pip", "install", "peft", "--force-reinstall"])
-        from peft import PeftModel, PeftConfig
-        print("PEFT installed and imported successfully!")
-    except Exception as e2:
-        print(f"Failed to install PEFT: {e2}")
-        print("Continuing without PEFT - will try alternative approach")
-        PeftModel = None
-        PeftConfig = None
-# Login using the secret token
-token = os.getenv("HF_TOKEN")
-if token:
-    login(token=token)
-    print("Successfully logged in to Hugging Face!")
-# Use your own Hugging Face model
-original_mistral_model = "navidfalah/3ai"  # Your model on Hugging Face
-adapter_path = "./model"  # Your local LoRA adapter directory (if available)
-print(f"Loading original Mistral tokenizer from {original_mistral_model}...")
-try:
-    # First try: Load with slow tokenizer from your model
-    tokenizer = AutoTokenizer.from_pretrained(
-        original_mistral_model,
-        use_fast=False,  # Use slow tokenizer to avoid issues
-        force_download=True,  # Force fresh download
-        resume_download=False
-    )
-    print("Your model tokenizer loaded successfully!")
-except Exception as e:
-    print(f"Error loading tokenizer from your model: {e}")
     try:
-        # Second try: Use original Mistral tokenizer
-        tokenizer = AutoTokenizer.from_pretrained(
-            "mistralai/Mistral-7B-Instruct-v0.1",
-            use_fast=False
-        )
-        print("Original Mistral tokenizer loaded successfully!")
-    except Exception as e2:
-        print(f"Error with original Mistral: {e2}")
-        try:
-            # Third try: Use different Mistral model version
-            print("Trying Mistral-7B-Instruct-v0.2...")
-            tokenizer = AutoTokenizer.from_pretrained(
-                "mistralai/Mistral-7B-Instruct-v0.2",
-                use_fast=False
-            )
-            print("Mistral v0.2 tokenizer loaded successfully!")
-        except Exception as e3:
-            print(f"Error with Mistral v0.2: {e3}")
-            try:
-                # Fourth try: Use compatible tokenizer
-                print("Trying compatible tokenizer...")
-                tokenizer = AutoTokenizer.from_pretrained(
-                    "microsoft/DialoGPT-medium",
-                    use_fast=False
-                )
-                print("Compatible tokenizer loaded successfully!")
-            except Exception as e4:
-                print(f"Error with compatible tokenizer: {e4}")
-                try:
-                    # Fifth try: Use GPT-2 as fallback
-                    print("Using GPT-2 as fallback...")
-                    tokenizer = AutoTokenizer.from_pretrained("gpt2")
-                    print("GPT-2 tokenizer loaded successfully!")
-                except Exception as e5:
-                    print(f"Cannot load any tokenizer: {e5}")
-                    print("Exiting - cannot proceed without tokenizer")
-                    exit(1)
-# Ensure tokenizer has proper tokens
-if tokenizer.pad_token is None:
-    tokenizer.pad_token = tokenizer.eos_token
-print(f"Loading your model from {original_mistral_model}...")
-try:
-    # Load your model from Hugging Face
-    base_model = AutoModelForCausalLM.from_pretrained(
-        original_mistral_model,
-        torch_dtype=torch.float16,
-        device_map="auto",
-        low_cpu_mem_usage=True
-    )
-    print("Your model loaded successfully!")
-    # Check if PEFT is available and try to load local adapter
-    if PeftModel is not None and PeftConfig is not None:
-        try:
-            print(f"Trying to load local LoRA adapter from {adapter_path}...")
-            model = PeftModel.from_pretrained(
-                base_model,
-                adapter_path,
-                torch_dtype=torch.float16
-            )
-            print("Local LoRA adapter loaded successfully!")
-        except Exception as adapter_error:
-            print(f"Could not load local adapter: {adapter_error}")
-            print("Using your base model without additional adapter")
-            model = base_model
-    else:
-        print("PEFT not available - using your base model")
-        model = base_model
-except Exception as e:
-    print(f"Error loading your model: {e}")
-    print("Trying to load original Mistral as fallback...")
     try:
-        # Fallback to original Mistral
         base_model = AutoModelForCausalLM.from_pretrained(
-            "mistralai/Mistral-7B-Instruct-v0.1",
-            torch_dtype=torch.float16,
             device_map="auto",
             low_cpu_mem_usage=True
         )
-        print("Fallback Mistral model loaded!")
-        model = base_model
-    except Exception as e2:
-        print(f"Cannot load any model: {e2}")
-        print("Exiting - cannot proceed without model")
-        exit(1)
-def chat_function(message):
-    if not message or not message.strip():
-        return "Please enter a message."
-    # Clean and limit input
-    message = message.strip()
-    if len(message) > 500:
-        return "Message too long! Please keep it under 500 characters."
     try:
-        # Use flexible prompt format based on tokenizer type
-        if hasattr(tokenizer, 'chat_template') or 'mistral' in tokenizer.name_or_path.lower():
-            # Use Mistral format if it's actually Mistral
-            prompt = f"<s>[INST] {message} [/INST]"
-        else:
-            # Use simple format for other tokenizers
-            prompt = f"User: {message}\nAssistant:"
         # Tokenize input
         inputs = tokenizer(
-            prompt,
-            return_tensors='pt',
             truncation=True,
-            max_length=400,
             padding=True
         )
-        input_ids = inputs['input_ids']
-        attention_mask = inputs.get('attention_mask', None)
-        # Move to model device
-        device = next(model.parameters()).device
-        input_ids = input_ids.to(device)
-        if attention_mask is not None:
-            attention_mask = attention_mask.to(device)
         # Generate response
         with torch.no_grad():
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
             outputs = model.generate(
-                input_ids,
-                max_new_tokens=200,
-                temperature=0.7,
                 do_sample=True,
-                top_p=0.9,
-                pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id else tokenizer.eos_token_id,
                 eos_token_id=tokenizer.eos_token_id,
-                attention_mask=attention_mask,
                 repetition_penalty=1.1
             )
-        # Extract and clean response
-        if outputs.shape[1] > input_ids.shape[1]:
-            response_ids = outputs[0][input_ids.shape[1]:]
-            response = tokenizer.decode(response_ids, skip_special_tokens=True)
-        else:
-            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-            response = response.replace(prompt, "").strip()
-        # Clean up response
-        response = response.strip()
-        # Remove prompt artifacts
-        for artifact in ["[/INST]", "[INST]", "Assistant:", "User:", "Human:"]:
-            if artifact in response:
-                response = response.split(artifact)[-1].strip()
-        # Remove input if it appears in response
-        if message.lower() in response.lower():
-            response = response.replace(message, "").strip()
-        # Ensure reasonable length
-        if len(response) > 800:
-            response = response[:800] + "..."
-        # Fallback if empty
-        if len(response.strip()) < 3:
-            response = "I understand. How can I help you?"
-        return response
     except Exception as e:
-        return f"Error: {str(e)}"
-def clear_chat():
-    return ""
-# Simple custom CSS
-css = """
-.gradio-container {
-    max-width: 700px !important;
-    margin: auto !important;
-}
-"""
-# Create interface
-with gr.Blocks(title="3AI - Text Generation", css=css, theme=gr.themes.Default()) as demo:
-    # Header
-    gr.Markdown("""
-    # 🤖 3AI Text Generator
-    *Simple text-to-text generation with your navidfalah/3ai model*
-    """)
-    # Input
-    with gr.Row():
-        input_text = gr.Textbox(
-            placeholder="Enter your text here...",
-            label="Input Text",
-            lines=2,
-            max_lines=3
         )
-    # Generate button
-    with gr.Row():
-        generate_btn = gr.Button("Generate", variant="primary", size="lg")
-    # Output
-    with gr.Row():
-        output_text = gr.Textbox(
-            label="Generated Text",
-            lines=6,
-            max_lines=10,
-            interactive=False,
-            placeholder="Generated text will appear here..."
         )
-    # Event handlers
-    generate_btn.click(
-        fn=chat_function,
-        inputs=input_text,
-        outputs=output_text
-    )
-    input_text.submit(
-        fn=chat_function,
-        inputs=input_text,
-        outputs=output_text
-    )
-    # Footer
-    gr.Markdown("---\n*navidfalah/3ai • Simple Text Generation*")
 if __name__ == "__main__":
     demo.launch()

+def test_model():
+    """Simple test function to check if model is working."""
+    try:
+        model, tokenizer = load_model()
+        if model and tokenizer:
+            test_input = "Test: Rate my satisfaction with work at 5/10"
+            inputs = tokenizer(test_input, return_tensors="pt", max_length=50)
+            with torch.no_grad():
+                outputs = model.generate(**inputs, max_new_tokens=20)
+            result = tokenizer.decode(outputs[0], skip_special_tokens=True)
+            print(f"Test successful! Output: {result}")
+            return True
+        return False
+    except Exception as e:
+        print(f"Test failed: {e}")
+        return Falseimport gradio as gr
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+from peft import PeftModel
 import os
+from typing import Tuple, Optional
+# Configuration
+class Config:
+    MODEL_PATH = "navidfalah/3ai"  # Your HF model repo
+    BASE_MODEL = "mistralai/Mistral-7B-Instruct-v0.1"  # Mistral base model
+    ADAPTER_PATH = "./model"  # Local adapter path if needed
+    MAX_NEW_TOKENS = 1000  # Reduced for faster response
+    TEMPERATURE = 0.7
+    TOP_P = 0.9
+    MAX_INPUT_LENGTH = 512  # Reduced for faster processing
+# Global variables for model and tokenizer
+model = None
+tokenizer = None
+def test_model():
+    """Simple test function to check if model is working."""
     try:
+        model, tokenizer = load_model()
+        if model and tokenizer:
+            test_input = "Test: Rate my satisfaction with work at 5/10"
+            inputs = tokenizer(test_input, return_tensors="pt", max_length=50)
+            with torch.no_grad():
+                outputs = model.generate(**inputs, max_new_tokens=20)
+            result = tokenizer.decode(outputs[0], skip_special_tokens=True)
+            print(f"Test successful! Output: {result}")
+            return True
+        return False
+    except Exception as e:
+        print(f"Test failed: {e}")
+        return False
+def load_model() -> Tuple[Optional[object], Optional[object]]:
+    """Load the fine-tuned satisfaction analysis model."""
+    global model, tokenizer
+    if model is not None and tokenizer is not None:
+        return model, tokenizer
     try:
+        print("🔄 Loading Mistral model and tokenizer...")
+        # Load tokenizer from base model (Mistral)
+        tokenizer = AutoTokenizer.from_pretrained(Config.BASE_MODEL)
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
+            tokenizer.padding_side = "left"  # Change to left padding for generation
+        # Quantization config for efficient inference
+        bnb_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype=torch.float16
+        )
+        # Load base Mistral model
         base_model = AutoModelForCausalLM.from_pretrained(
+            Config.BASE_MODEL,
+            quantization_config=bnb_config,
             device_map="auto",
+            trust_remote_code=True,
+            torch_dtype=torch.float16,
             low_cpu_mem_usage=True
         )
+        # Try loading adapter from HF repo first
+        try:
+            model = PeftModel.from_pretrained(
+                base_model,
+                Config.MODEL_PATH,
+                is_trainable=False,
+                torch_dtype=torch.float16
+            )
+            print("✅ Loaded model from Hugging Face repo")
+        except Exception as e:
+            print(f"Could not load from HF: {e}")
+            # Fallback to local adapter if available
+            if os.path.exists(Config.ADAPTER_PATH):
+                model = PeftModel.from_pretrained(
+                    base_model,
+                    Config.ADAPTER_PATH,
+                    is_trainable=False,
+                    torch_dtype=torch.float16
+                )
+                print("✅ Loaded model from local adapter")
+            else:
+                # If no adapter found, use base model
+                model = base_model
+                print("⚠️ Using base model without adapter")
+        model.eval()
+        print("✅ Mistral-7B model loaded successfully!")
+        print(f"Device: {next(model.parameters()).device}")
+        return model, tokenizer
+    except Exception as e:
+        print(f"❌ Error loading model: {e}")
+        import traceback
+        traceback.print_exc()
+        return None, None
+def analyze_satisfaction(user_input: str) -> str:
+    """Generate satisfaction analysis based on user input text."""
+    if not user_input or not user_input.strip():
+        return "⚠️ Please enter some text describing your life situation or what you'd like analyzed."
+    # Show loading message
+    yield "🔄 Loading model and analyzing your input... This may take a moment on first run."
+    # Load model if not already loaded
+    model, tokenizer = load_model()
+    if model is None or tokenizer is None:
+        yield "❌ Error: Could not load the model. Please check the model configuration and try again."
+        return
     try:
+        yield "🔍 Processing your input..."
+        # Prepare the prompt in Mistral format
+        formatted_prompt = f"[INST] {user_input} [/INST]"
         # Tokenize input
         inputs = tokenizer(
+            formatted_prompt,
+            return_tensors="pt",
             truncation=True,
+            max_length=Config.MAX_INPUT_LENGTH,
             padding=True
         )
+        # Move to GPU if available
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        if device == "cuda":
+            inputs = {k: v.to(device) for k, v in inputs.items()}
+            model.to(device)
+        yield "💭 Generating analysis..."
         # Generate response
         with torch.no_grad():
             outputs = model.generate(
+                **inputs,
+                max_new_tokens=Config.MAX_NEW_TOKENS,
+                temperature=Config.TEMPERATURE,
+                top_p=Config.TOP_P,
                 do_sample=True,
+                pad_token_id=tokenizer.eos_token_id,
                 eos_token_id=tokenizer.eos_token_id,
                 repetition_penalty=1.1
             )
+        # Decode response
+        full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Extract generated text (remove input prompt)
+        if "[/INST]" in full_response:
+            generated_text = full_response.split("[/INST]")[-1].strip()
+        else:
+            generated_text = full_response[len(formatted_prompt):].strip()
+        # Format the output
+        formatted_output = "## 📊 Life Satisfaction Analysis\n\n"
+        if generated_text:
+            formatted_output += generated_text
+        else:
+            formatted_output += "I apologize, but I couldn't generate a proper analysis. Please try rephrasing your input or provide more details about your life situation."
+        yield formatted_output
     except Exception as e:
+        error_msg = f"❌ Error during analysis: {str(e)}\n\n"
+        error_msg += "**Troubleshooting tips:**\n"
+        error_msg += "- Make sure the model is properly uploaded to Hugging Face\n"
+        error_msg += "- Check if the Space has enough resources (GPU/CPU)\n"
+        error_msg += "- Try with a shorter input text\n"
+        error_msg += f"- Current device: {'GPU' if torch.cuda.is_available() else 'CPU'}"
+        yield error_msg
+# Example prompts for users
+EXAMPLE_PROMPTS = [
+    "I'm a 29-year-old professional feeling burned out at work. My health is okay but I rarely exercise. Financially stable but not saving much. Great relationship with my partner. What's my life satisfaction score?",
+    "Rate my life satisfaction: Work is stressful (3/10), health is good (7/10), finances are tight (4/10), relationships are excellent (9/10). Give me a comprehensive analysis.",
+    "Analyze my satisfaction: Career going well, making good money, but no time for friends or hobbies. Always tired and stressed. How can I improve?",
+    "I'm happy with my job and relationships but struggling with debt and health issues. Need advice on balancing everything.",
+    "Just graduated, starting my career, living paycheck to paycheck, single but happy, very healthy and active. Analyze my life satisfaction."
+]
+# Gradio Interface
+def create_interface():
+    """Create the Gradio interface."""
+    with gr.Blocks(title="Life Satisfaction Analysis", theme=gr.themes.Soft()) as demo:
+        gr.Markdown(
+            """
+            # 🌟 AI Life Satisfaction Analyzer
+            This AI-powered tool analyzes your life satisfaction based on your description of your current situation.
+            Simply describe your life circumstances, challenges, and satisfaction levels across different areas.
+            **The AI will analyze:**
+            - Overall life satisfaction score
+            - Balance across life domains (work, health, finances, relationships)
+            - Personalized recommendations for improvement
+            - Action plans and strategies
+            """
         )
+        with gr.Row():
+            with gr.Column():
+                # Input section
+                input_text = gr.Textbox(
+                    label="📝 Describe Your Current Life Situation",
+                    placeholder="Tell me about your work, health, finances, relationships, and any other aspects of your life you'd like analyzed. You can include satisfaction ratings (1-10) or just describe how you feel about each area.",
+                    lines=8,
+                    max_lines=15
+                )
+                with gr.Row():
+                    analyze_btn = gr.Button("🔍 Analyze My Life Satisfaction", variant="primary", scale=2)
+                    clear_btn = gr.Button("🗑️ Clear", scale=1)
+                # Examples section
+                gr.Markdown("### 💡 Example Inputs")
+                example_dropdown = gr.Dropdown(
+                    choices=EXAMPLE_PROMPTS,
+                    label="Select an example to try:",
+                    interactive=True
+                )
+        with gr.Row():
+            with gr.Column():
+                # Output section
+                output = gr.Markdown(label="Analysis Results")
+        # Event handlers
+        analyze_btn.click(
+            fn=analyze_satisfaction,
+            inputs=input_text,
+            outputs=output
+        )
+        clear_btn.click(
+            fn=lambda: ("", ""),
+            inputs=[],
+            outputs=[input_text, output]
+        )
+        example_dropdown.change(
+            fn=lambda x: x,
+            inputs=example_dropdown,
+            outputs=input_text
+        )
+        # Tips section
+        with gr.Accordion("���� Tips for Best Results", open=False):
+            gr.Markdown(
+                """
+                **How to get the most accurate analysis:**
+                1. **Be specific** about your situation in each life area
+                2. **Include ratings** (1-10) if you want quantified analysis
+                3. **Mention your age** and life stage for context
+                4. **Describe challenges** you're facing
+                5. **Share your goals** or what you'd like to improve
+                **Example format:**
+                - Work: [Your situation and satisfaction level]
+                - Health: [Physical and mental wellness status]
+                - Finances: [Financial situation and concerns]
+                - Relationships: [Social and romantic relationships]
+                - Personal: [Hobbies, growth, fulfillment]
+                """
+            )
+        # Footer
+        gr.Markdown(
+            """
+            ---
+            💡 **Disclaimer:** This AI tool provides general insights based on the information you provide.
+            For professional advice, please consult qualified experts in relevant fields.
+            🔒 **Privacy:** Your input is processed in real-time and not stored.
+            """
         )
+    return demo
+# Launch the app
 if __name__ == "__main__":
+    # Check environment
+    print("🚀 Starting Life Satisfaction Analysis Tool...")
+    print(f"PyTorch version: {torch.__version__}")
+    print(f"CUDA available: {torch.cuda.is_available()}")
+    if torch.cuda.is_available():
+        print(f"CUDA device: {torch.cuda.get_device_name(0)}")
+    # Try to load model on startup (but don't fail if it doesn't work)
+    try:
+        load_model()
+    except Exception as e:
+        print(f"Note: Model will be loaded on first use. Error: {e}")
+    # Create and launch interface
+    demo = create_interface()
+    demo.queue()  # Enable queue for streaming
     demo.launch()