Spaces:

navidfalah
/

3AI

Sleeping

App Files Files Community

navidfalah commited on Jul 4, 2025

Commit

b7684e9

verified ·

1 Parent(s): ec22a3b

Update app.py

Browse files

Files changed (1) hide show

app.py +319 -268

app.py CHANGED Viewed

@@ -1,307 +1,358 @@
 import gradio as gr
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
-from peft import PeftModel
 import os
-from typing import Tuple, Optional
-# Configuration
-class Config:
-    MODEL_PATH = "navidfalah/3ai"  # Your HF model repo
-    BASE_MODEL = "mistralai/Mistral-7B-Instruct-v0.1"  # Mistral base model
-    ADAPTER_PATH = "./model"  # Local adapter path if needed
-    MAX_NEW_TOKENS = 2000
-    TEMPERATURE = 0.7
-    TOP_P = 0.9
-# Global variables for model and tokenizer
-model = None
-tokenizer = None
-def load_model() -> Tuple[Optional[object], Optional[object]]:
-    """Load the fine-tuned satisfaction analysis model."""
-    global model, tokenizer
-    if model is not None and tokenizer is not None:
-        return model, tokenizer
     try:
-        print("🔄 Loading Mistral model and tokenizer...")
-        # Load tokenizer from base model (Mistral)
-        tokenizer = AutoTokenizer.from_pretrained(Config.BASE_MODEL)
-        if tokenizer.pad_token is None:
-            tokenizer.pad_token = tokenizer.eos_token
-            tokenizer.padding_side = "right"
-        # Quantization config for efficient inference
-        bnb_config = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_use_double_quant=True,
-            bnb_4bit_quant_type="nf4",
-            bnb_4bit_compute_dtype=torch.float16
-        )
-        # Load base Mistral model
-        base_model = AutoModelForCausalLM.from_pretrained(
-            Config.BASE_MODEL,
-            quantization_config=bnb_config,
-            device_map="auto",
-            trust_remote_code=True,
-            torch_dtype=torch.float16
         )
-        # Try loading adapter from HF repo first
         try:
-            model = PeftModel.from_pretrained(
-                base_model,
-                Config.MODEL_PATH,
-                is_trainable=False
             )
-            print("✅ Loaded model from Hugging Face repo")
-        except:
-            # Fallback to local adapter if available
-            if os.path.exists(Config.ADAPTER_PATH):
-                model = PeftModel.from_pretrained(
-                    base_model,
-                    Config.ADAPTER_PATH,
-                    is_trainable=False
                 )
-                print("✅ Loaded model from local adapter")
-            else:
-                raise Exception("Could not load adapter from HF or local path")
-        model.eval()
-        print("✅ Mistral-7B model loaded successfully!")
-        return model, tokenizer
-    except Exception as e:
-        print(f"❌ Error loading model: {e}")
-        return None, None
-def create_prompt(work: int, health: int, financial: int, relationship: int, context: str) -> str:
-    """Create the analysis prompt with user inputs."""
-    prompt = f"""As a holistic life satisfaction analyst, please provide a comprehensive analysis of this person's overall life satisfaction across all major life domains.
-**Complete Life Satisfaction Assessment:**
-**WORK SATISFACTION** (1-10 scale) {work}/10:
-**HEALTH & WELLNESS** (1-10 scale) {health}/10:
-**FINANCIAL SATISFACTION** (1-10 scale) {financial}/10:
-**RELATIONSHIP SATISFACTION** (1-10 scale) {relationship}/10:
-**Please provide a comprehensive analysis that includes:**
-1. **Overall Life Satisfaction Score** (1-10) with detailed rationale
-2. **Domain Rankings:** Rank all four life areas from strongest to weakest
-3. **Interconnection Analysis:** How different life domains influence each other
-4. **Priority Assessment:** Which area needs the most immediate attention and why
-5. **Holistic Improvement Strategy:**
-   - Critical first steps (next 30 days)
-   - Balanced development plan (3-6 months)
-   - Long-term life optimization (6-12 months)
-6. **Life Balance Recommendations:** How to create synergy between all life areas
-7. **Resilience Building:** Strategies to strengthen overall life satisfaction foundation
-8. **Success Metrics:** How to track progress across all domains
-**Context:** {context}"""
-    return prompt
-def analyze_satisfaction(
-    work: int,
-    health: int,
-    financial: int,
-    relationship: int,
-    context: str
-) -> str:
-    """Generate satisfaction analysis based on user inputs."""
-    # Load model if not already loaded
-    model, tokenizer = load_model()
-    if model is None or tokenizer is None:
-        return "❌ Error: Could not load the model. Please try again later."
     try:
-        # Create prompt
-        prompt = create_prompt(work, health, financial, relationship, context)
         # Tokenize input
-        inputs = tokenizer(
-            prompt,
-            return_tensors="pt",
-            truncation=True,
-            max_length=512
-        )
-        # Move to GPU if available
-        if torch.cuda.is_available():
-            inputs = {k: v.to(model.device) for k, v in inputs.items()}
         # Generate response
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=Config.MAX_NEW_TOKENS,
-                temperature=Config.TEMPERATURE,
-                top_p=Config.TOP_P,
-                do_sample=True,
-                pad_token_id=tokenizer.eos_token_id,
-                eos_token_id=tokenizer.eos_token_id
-            )
         # Decode response
-        full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract generated text (remove prompt)
-        generated_text = full_response[len(prompt):].strip()
-        # Calculate average score
-        avg_score = (work + health + financial + relationship) / 4
-        # Add summary at the beginning
-        summary = f"📊 **Quick Summary**\n"
-        summary += f"- Average Satisfaction Score: {avg_score:.1f}/10\n"
-        summary += f"- Highest Domain: {get_highest_domain(work, health, financial, relationship)}\n"
-        summary += f"- Lowest Domain: {get_lowest_domain(work, health, financial, relationship)}\n\n"
-        summary += "---\n\n"
-        return summary + generated_text
     except Exception as e:
-        return f"❌ Error during analysis: {str(e)}"
-def get_highest_domain(work: int, health: int, financial: int, relationship: int) -> str:
-    """Get the domain with highest satisfaction."""
-    scores = {
-        "Work": work,
-        "Health & Wellness": health,
-        "Financial": financial,
-        "Relationships": relationship
-    }
-    return max(scores, key=scores.get) + f" ({scores[max(scores, key=scores.get)]}/10)"
-def get_lowest_domain(work: int, health: int, financial: int, relationship: int) -> str:
-    """Get the domain with lowest satisfaction."""
-    scores = {
-        "Work": work,
-        "Health & Wellness": health,
-        "Financial": financial,
-        "Relationships": relationship
-    }
-    return min(scores, key=scores.get) + f" ({scores[min(scores, key=scores.get)]}/10)"
-# Gradio Interface
-def create_interface():
-    """Create the Gradio interface."""
-    with gr.Blocks(title="Life Satisfaction Analysis", theme=gr.themes.Soft()) as demo:
-        gr.Markdown(
-            """
-            # 🌟 Life Satisfaction Analysis Tool
-            This AI-powered tool provides comprehensive analysis of your life satisfaction across four key domains:
-            Work, Health & Wellness, Financial, and Relationships.
-            **How to use:**
-            1. Rate your satisfaction in each life domain (1-10 scale)
-            2. Provide brief context about your situation
-            3. Click "Analyze" to receive personalized insights and recommendations
-            """
-        )
-        with gr.Row():
-            with gr.Column(scale=1):
-                gr.Markdown("### 📊 Rate Your Satisfaction")
-                work_score = gr.Slider(
-                    minimum=1,
-                    maximum=10,
-                    value=5,
-                    step=1,
-                    label="💼 Work Satisfaction",
-                    info="How satisfied are you with your career/work life?"
-                )
-                health_score = gr.Slider(
-                    minimum=1,
-                    maximum=10,
-                    value=5,
-                    step=1,
-                    label="🏃 Health & Wellness",
-                    info="How satisfied are you with your physical and mental health?"
-                )
-                financial_score = gr.Slider(
-                    minimum=1,
-                    maximum=10,
-                    value=5,
-                    step=1,
-                    label="💰 Financial Satisfaction",
-                    info="How satisfied are you with your financial situation?"
-                )
-                relationship_score = gr.Slider(
-                    minimum=1,
-                    maximum=10,
-                    value=5,
-                    step=1,
-                    label="❤️ Relationships",
-                    info="How satisfied are you with your personal relationships?"
-                )
-                context_input = gr.Textbox(
-                    label="📝 Context (Optional)",
-                    placeholder="Share any relevant context about your situation (age, goals, challenges, etc.)",
-                    lines=3
-                )
-                analyze_btn = gr.Button("🔍 Analyze My Life Satisfaction", variant="primary")
-            with gr.Column(scale=2):
-                gr.Markdown("### 📋 Your Personalized Analysis")
-                output = gr.Markdown()
-        # Example section
-        with gr.Row():
-            gr.Examples(
-                examples=[
-                    [3, 5, 7, 8, "29-year-old professional seeking work-life balance"],
-                    [7, 4, 6, 5, "45-year-old focusing on health improvement"],
-                    [5, 8, 4, 9, "Recent graduate starting career journey"],
-                ],
-                inputs=[work_score, health_score, financial_score, relationship_score, context_input],
-                label="Example Scenarios"
             )
-        # Connect the analyze button
-        analyze_btn.click(
-            fn=analyze_satisfaction,
-            inputs=[work_score, health_score, financial_score, relationship_score, context_input],
-            outputs=output
-        )
-        # Footer
-        gr.Markdown(
-            """
-            ---
-            💡 **Note:** This tool provides AI-generated insights based on your inputs.
-            For professional advice, please consult qualified experts in relevant fields.
-            """
-        )
-    return demo
-# Launch the app
 if __name__ == "__main__":
-    # Load model on startup
-    print("🚀 Starting Life Satisfaction Analysis Tool...")
-    load_model()
-    # Create and launch interface
-    demo = create_interface()
     demo.launch()

 import gradio as gr
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from huggingface_hub import login
 import os
+import subprocess
+import sys
+print("Starting 3AI application...")
+# Install required dependencies
+print("Installing required dependencies...")
+try:
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "sentencepiece", "protobuf", "peft", "--quiet"])
+    print("Dependencies installed successfully!")
+except Exception as e:
+    print(f"Warning: Could not install dependencies: {e}")
+# Import PEFT after installation
+try:
+    from peft import PeftModel, PeftConfig
+    print("PEFT imported successfully!")
+except ImportError as e:
+    print(f"Could not import PEFT: {e}")
+    print("Trying to install PEFT again...")
     try:
+        subprocess.check_call([sys.executable, "-m", "pip", "install", "peft", "--force-reinstall"])
+        from peft import PeftModel, PeftConfig
+        print("PEFT installed and imported successfully!")
+    except Exception as e2:
+        print(f"Failed to install PEFT: {e2}")
+        print("Continuing without PEFT - will try alternative approach")
+        PeftModel = None
+        PeftConfig = None
+# Login using the secret token
+token = os.getenv("HF_TOKEN")
+if token:
+    login(token=token)
+    print("Successfully logged in to Hugging Face!")
+# Use your own Hugging Face model
+original_mistral_model = "navidfalah/3ai"  # Your model on Hugging Face
+adapter_path = "./model"  # Your local LoRA adapter directory (if available)
+print(f"Loading original Mistral tokenizer from {original_mistral_model}...")
+try:
+    # First try: Load with slow tokenizer from your model
+    tokenizer = AutoTokenizer.from_pretrained(
+        original_mistral_model,
+        use_fast=False,  # Use slow tokenizer to avoid issues
+        force_download=True,  # Force fresh download
+        resume_download=False
+    )
+    print("Your model tokenizer loaded successfully!")
+except Exception as e:
+    print(f"Error loading tokenizer from your model: {e}")
+    try:
+        # Second try: Use original Mistral tokenizer
+        tokenizer = AutoTokenizer.from_pretrained(
+            "mistralai/Mistral-7B-Instruct-v0.1",
+            use_fast=False
         )
+        print("Original Mistral tokenizer loaded successfully!")
+    except Exception as e2:
+        print(f"Error with original Mistral: {e2}")
         try:
+            # Third try: Use different Mistral model version
+            print("Trying Mistral-7B-Instruct-v0.2...")
+            tokenizer = AutoTokenizer.from_pretrained(
+                "mistralai/Mistral-7B-Instruct-v0.2",
+                use_fast=False
             )
+            print("Mistral v0.2 tokenizer loaded successfully!")
+        except Exception as e3:
+            print(f"Error with Mistral v0.2: {e3}")
+            try:
+                # Fourth try: Use compatible tokenizer
+                print("Trying compatible tokenizer...")
+                tokenizer = AutoTokenizer.from_pretrained(
+                    "microsoft/DialoGPT-medium",
+                    use_fast=False
                 )
+                print("Compatible tokenizer loaded successfully!")
+            except Exception as e4:
+                print(f"Error with compatible tokenizer: {e4}")
+                try:
+                    # Fifth try: Use GPT-2 as fallback
+                    print("Using GPT-2 as fallback...")
+                    tokenizer = AutoTokenizer.from_pretrained("gpt2")
+                    print("GPT-2 tokenizer loaded successfully!")
+                except Exception as e5:
+                    print(f"Cannot load any tokenizer: {e5}")
+                    print("Exiting - cannot proceed without tokenizer")
+                    exit(1)
+# Ensure tokenizer has proper tokens
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
+print(f"Loading your fine-tuned Mistral model from {model_path}...")
+try:
+    # Load your fine-tuned model weights
+    model = AutoModelForCausalLM.from_pretrained(
+        model_path,
+        torch_dtype=torch.float16,
+        device_map="auto",
+        trust_remote_code=True,
+        low_cpu_mem_usage=True,
+        local_files_only=True
+    )
+    print("Fine-tuned Mistral model loaded successfully!")
+except Exception as e:
+    print(f"Error loading fine-tuned model from {model_path}: {e}")
+    print("Trying without local_files_only...")
+    try:
+        model = AutoModelForCausalLM.from_pretrained(
+            model_path,
+            torch_dtype=torch.float16,
+            device_map="auto",
+            trust_remote_code=True,
+            low_cpu_mem_usage=True
+        )
+        print("Fine-tuned Mistral model loaded successfully!")
+    except Exception as e2:
+        print(f"Cannot load fine-tuned model: {e2}")
+        print("Exiting - cannot proceed without your fine-tuned model")
+        exit(1)
+def chat_function(message):
+    if not message or not message.strip():
+        return "Please enter a message to get started!"
+    # Limit input length
+    if len(message) > 300:
+        return "Message too long! Please keep it under 300 characters."
     try:
+        # Use flexible prompt format based on tokenizer type
+        if hasattr(tokenizer, 'chat_template') or 'mistral' in tokenizer.name_or_path.lower():
+            # Use Mistral format if it's actually Mistral
+            prompt = f"<s>[INST] {message.strip()} [/INST]"
+        else:
+            # Use simple format for other tokenizers
+            prompt = f"Human: {message.strip()}\nAssistant:"
         # Tokenize input
+        try:
+            inputs = tokenizer(
+                prompt,
+                return_tensors='pt',
+                truncation=True,
+                max_length=512,
+                padding=True
+            )
+            input_ids = inputs['input_ids']
+            attention_mask = inputs.get('attention_mask', None)
+        except Exception as e:
+            print(f"Tokenization error: {e}")
+            return f"Error processing your message: {str(e)}"
+        # Validate input
+        if input_ids.shape[-1] == 0:
+            return "Error: Empty input after encoding"
+        print(f"Input shape: {input_ids.shape}")
+        # Move to model device
+        try:
+            device = next(model.parameters()).device
+            input_ids = input_ids.to(device)
+            if attention_mask is not None:
+                attention_mask = attention_mask.to(device)
+        except Exception as e:
+            print(f"Device move error: {e}")
         # Generate response
+        try:
+            with torch.no_grad():
+                # Clear cache to prevent memory issues
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+                # Conservative generation parameters
+                generation_kwargs = {
+                    'input_ids': input_ids,
+                    'max_new_tokens': 150,
+                    'temperature': 0.7,
+                    'do_sample': True,
+                    'pad_token_id': tokenizer.pad_token_id,
+                    'eos_token_id': tokenizer.eos_token_id,
+                    'num_return_sequences': 1,
+                    'repetition_penalty': 1.1,
+                    'top_p': 0.9,
+                    'use_cache': True,
+                    'num_beams': 1,
+                }
+                # Add attention mask if available
+                if attention_mask is not None:
+                    generation_kwargs['attention_mask'] = attention_mask
+                print(f"Generating with input_ids shape: {input_ids.shape}")
+                outputs = model.generate(**generation_kwargs)
+                print(f"Generated output shape: {outputs.shape}")
+        except Exception as e:
+            print(f"Generation error: {e}")
+            # Try with minimal settings
+            try:
+                print("Trying with minimal settings...")
+                outputs = model.generate(
+                    input_ids,
+                    max_new_tokens=80,
+                    do_sample=False,  # Greedy decoding
+                    pad_token_id=tokenizer.pad_token_id,
+                    eos_token_id=tokenizer.eos_token_id,
+                )
+                print(f"Minimal generation output shape: {outputs.shape}")
+            except Exception as e2:
+                print(f"Minimal generation also failed: {e2}")
+                return f"Error generating response: {str(e)}"
         # Decode response
+        try:
+            # Extract only the new tokens (response part)
+            if outputs.shape[1] > input_ids.shape[1]:
+                response_ids = outputs[0][input_ids.shape[1]:]
+                response = tokenizer.decode(response_ids, skip_special_tokens=True)
+            else:
+                # Fallback: decode full output and remove prompt
+                full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+                response = full_response.replace(prompt.replace("<s>", "").replace("</s>", ""), "").strip()
+        except Exception as e:
+            print(f"Decoding error: {e}")
+            try:
+                # Last resort: decode full output
+                full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+                response = full_response
+            except:
+                return f"Error decoding response: {str(e)}"
+        # Clean up the response based on tokenizer type
+        response = response.strip()
+        # Remove prompt artifacts based on what we used
+        if "[/INST]" in response:
+            response = response.split("[/INST]")[-1].strip()
+        if "[INST]" in response:
+            response = response.split("[INST]")[0].strip()
+        if "Assistant:" in response:
+            response = response.split("Assistant:")[-1].strip()
+        if "Human:" in response:
+            response = response.split("Human:")[0].strip()
+        # Remove input message if it appears in response
+        if message.strip() in response:
+            response = response.replace(message.strip(), "").strip()
+        # Limit response length
+        if len(response) > 1000:
+            response = response[:1000] + "..."
+        # Ensure we have a meaningful response
+        if len(response.strip()) < 5:
+            response = "I understand your message. How can I help you with that?"
+        return response
     except Exception as e:
+        print(f"Unexpected error: {e}")
+        return f"Sorry, I encountered an unexpected error: {str(e)}"
+def clear_chat():
+    return "", ""
+# Simple custom CSS
+css = """
+.gradio-container {
+    max-width: 700px !important;
+    margin: auto !important;
+}
+"""
+# Create interface
+with gr.Blocks(title="3AI Chat Bot - Fine-tuned Mistral", css=css, theme=gr.themes.Default()) as demo:
+    # Header
+    gr.Markdown("""
+    # 🤖 3AI Chat Bot
+    *Powered by your fine-tuned Mistral-7B-Instruct model*
+    **Using your navidfalah/3ai model**
+    """)
+    # Main chat area
+    with gr.Row():
+        with gr.Column():
+            message_input = gr.Textbox(
+                placeholder="Type your message here... (max 300 characters)",
+                label="Your Message",
+                lines=3,
+                max_lines=4
             )
+            with gr.Row():
+                submit_btn = gr.Button("Send", variant="primary", scale=3)
+                clear_btn = gr.Button("Clear", variant="secondary", scale=1)
+    # Response area
+    response_output = gr.Textbox(
+        label="AI Response",
+        lines=15,
+        max_lines=25,
+        interactive=False,
+        placeholder="Your fine-tuned model responses will appear here..."
+    )
+    # Character counter
+    char_count = gr.HTML("<div style='text-align: right; color: #666; font-size: 12px;'>0/300 characters</div>")
+    # Event handlers
+    submit_btn.click(
+        fn=chat_function,
+        inputs=message_input,
+        outputs=response_output
+    )
+    message_input.submit(
+        fn=chat_function,
+        inputs=message_input,
+        outputs=response_output
+    )
+    clear_btn.click(
+        fn=clear_chat,
+        outputs=[message_input, response_output]
+    )
+    # Update character counter
+    def update_char_count(text):
+        count = len(text) if text else 0
+        color = "#e74c3c" if count > 300 else "#666"
+        return f"<div style='text-align: right; color: {color}; font-size: 12px;'>{count}/300 characters</div>"
+    message_input.change(
+        fn=update_char_count,
+        inputs=message_input,
+        outputs=char_count
+    )
+    # Footer
+    gr.Markdown("---\n*Built with your navidfalah/3ai model • Gradio + Transformers*")
 if __name__ == "__main__":
     demo.launch()