import gradio as gr
import torch
#from transformers import AutoTokenizer, Mistral3ForConditionalGeneration
import re
import os
from typing import List, Tuple
import spaces


# Model configuration
MODEL_NAME = "yasserrmd/SinaReason-Magistral-2509"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Medical system prompt as recommended by the model card
MEDICAL_SYSTEM_PROMPT = """
You are SinaReason, a medical reasoning assistant for educational and clinical support.
Your goal is to carefully reason through clinical problems for a professional audience (clinicians, students).
**Never provide medical advice directly to a patient.**
First, draft your detailed thought process (inner monologue) inside <think> ... </think>.
- Use this section to work through symptoms, differential diagnoses, and investigation plans.
- Be explicit and thorough in your reasoning.
After closing </think>, provide a clear, self-contained medical summary appropriate for a clinical professional.
- Summarize the most likely diagnosis and your reasoning.
- Suggest next steps for investigation or management.
"""


class SinaReasonMedicalChat:
    def __init__(self):
        self.tokenizer = None
        self.model = None
        # The PixtralProcessor requires an image argument, even if it's None.
        # This is a mandatory part of the call signature.
        self.dummy_image = None 
        #self.load_model()
            
    def load_model(self):
        """Load the SinaReason medical model and tokenizer using Unsloth"""
        try:
            from unsloth import FastLanguageModel
            print(f"Loading medical model with Unsloth: {MODEL_NAME}")
            print("cuda" if torch.cuda.is_available() else "cpu")
            
            # Use FastLanguageModel from Unsloth to load the model and tokenizer
            self.model, self.tokenizer = FastLanguageModel.from_pretrained(
                model_name=MODEL_NAME,
                dtype=torch.bfloat16,
                load_in_4bit=True, # Or False if you have enough VRAM for 16-bit
                device_map="cuda",
            )
            
            print("SinaReason medical model loaded successfully with Unsloth!")
            
        except Exception as e:
            print(f"Error loading model with Unsloth: {e}")
            raise e
    
    def extract_thinking_and_response(self, text: str) -> Tuple[str, str]:
        """Extract thinking process from <think>...</think> tags and clinical response"""
        think_pattern = r'<think>(.*?)</think>'
        thinking = ""
        response = text
        
        match = re.search(think_pattern, text, re.DOTALL | re.IGNORECASE)
        if match:
            thinking = match.group(1).strip()
            response = re.sub(think_pattern, "", text, flags=re.DOTALL | re.IGNORECASE).strip()
        
        return thinking, response

    @spaces.GPU(duration=120)
    def medical_chat(self, message: str, history: List[List[str]], max_tokens: int = 1024,
                           temperature: float = 0.7, top_p: float = 0.95) -> Tuple[str, List[List[str]]]:
        """Generate medical reasoning responses using the Unsloth model."""
        # No need for model.to(DEVICE), Unsloth's device_map handles it.
        self.load_model()
        self.model.eval()
        if not message.strip():
            return "", history

        # Apply the chat template with the medical system prompt
        messages = [{"role": "system", "content": MEDICAL_SYSTEM_PROMPT}]
        for user_msg, assistant_msg in history:
            raw_assistant_msg = assistant_msg.split("🩺 **Clinical Summary**")[-1].strip()
            messages.append({"role": "user", "content": user_msg})
            messages.append({"role": "assistant", "content": raw_assistant_msg})
        messages.append({"role": "user", "content": message})

        # Format the prompt using the chat template
        formatted_prompt = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True,
        )

        # Tokenize the input, correctly passing images=None
        inputs = self.tokenizer(
            text=formatted_prompt,
            images=self.dummy_image,
            return_tensors="pt"
        ).to(self.model.device)
        
        # Generation parameters
        generation_kwargs = {
            **inputs,
            "images": self.dummy_image, # This MUST be passed to model.generate
            "max_new_tokens": max_tokens,
            "temperature": temperature,
            "top_p": top_p,
            "do_sample": True,
            "pad_token_id": self.tokenizer.eos_token_id,
        }

        # Generate the full response
        output = self.model.generate(**generation_kwargs)[0]
        
        # Decode only the newly generated tokens
        full_response = self.tokenizer.decode(output[inputs.input_ids.shape[1]:], skip_special_tokens=True)

        # Extract thinking and clinical summary
        thinking, response = self.extract_thinking_and_response(full_response)

        # Format the final display
        final_display = ""
        if thinking:
            final_display += f"""🧠 **Medical Reasoning Process**
                <details>
                <summary>🔍 Click to view detailed thinking process</summary>
                *{thinking}*
                </details>
                ---
                """
        
        final_display += f"""🩺 **Clinical Summary**
            {response}"""

        new_history = history + [[message, final_display]]
        return "", new_history


# Initialize the medical chat model
medical_chat_model = SinaReasonMedicalChat()

def respond(message, history, max_tokens, temperature, top_p):
    """Gradio response function for medical reasoning"""
    return medical_chat_model.medical_chat(message, history, max_tokens, temperature, top_p)

# Custom CSS for medical interface
css = """
.medical-chatbot {
    min-height: 700px;
    border: 2px solid #e3f2fd;
    border-radius: 10px;
}
.thinking-section {
    background: linear-gradient(135deg, #f8f9ff 0%, #e8f4f8 100%);
    border-left: 4px solid #2196f3;
    padding: 15px;
    margin: 10px 0;
    border-radius: 8px;
    font-family: 'Monaco', monospace;
    font-size: 0.9em;
}
.clinical-response {
    background: linear-gradient(135deg, #fff8f0 0%, #fef7ed 100%);
    border-left: 4px solid #ff9800;
    padding: 15px;
    margin: 10px 0;
    border-radius: 8px;
}
.warning-box {
    background: #fff3cd;
    border: 1px solid #ffeaa7;
    border-radius: 8px;
    padding: 15px;
    margin: 15px 0;
    color: #856404;
}
.footer-text {
    text-align: center;
    color: #666;
    font-size: 0.9em;
    margin-top: 20px;
}
"""

# Create medical Gradio interface
with gr.Blocks(css=css, title="SinaReason Medical Reasoning", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # 🩺 SinaReason Medical Reasoning Assistant
    
    **Advanced Clinical Reasoning Model** - Inspired by Ibn Sina (Avicenna)
    
    This model provides transparent chain-of-thought medical reasoning for **educational and clinical support purposes**.
    """)
    
    # Medical disclaimer
    with gr.Row():
        gr.HTML("""
        <div class="warning-box">
            <h4>⚠️ Important Medical Disclaimer</h4>
            <p><strong>This is a research and educational tool for medical professionals, researchers, and students.</strong></p>
            <ul>
                <li>🚫 <strong>NOT a medical device</strong> - Not for patient diagnosis or treatment</li>
                <li>👨‍⚕️ <strong>Professional use only</strong> - Intended for clinicians and medical students</li>
                <li>🔍 <strong>Verify all outputs</strong> - Always confirm with qualified medical professionals</li>
                <li>📚 <strong>Educational purpose</strong> - For learning clinical reasoning patterns</li>
            </ul>
        </div>
        """)
    
    with gr.Row():
        with gr.Column(scale=4):
            chatbot = gr.Chatbot(
                height=700,
                show_copy_button=True,
                bubble_full_width=False,
                elem_classes=["medical-chatbot"],
                avatar_images=(None, "🩺")
            )
            
            msg = gr.Textbox(
                placeholder="Describe a clinical scenario or case for medical reasoning analysis...",
                lines=3,
                max_lines=8,
                show_label=False,
                container=False
            )
            
            with gr.Row():
                submit_btn = gr.Button("🔍 Analyze Case", variant="primary", size="sm")
                clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="sm")
                retry_btn = gr.Button("🔄 Retry", variant="secondary", size="sm")
        
        with gr.Column(scale=1, min_width=250):
            gr.Markdown("### ⚙️ Model Parameters")
            
            max_tokens = gr.Slider(
                minimum=256,
                maximum=2048,
                value=1024,
                step=64,
                label="Max Tokens",
                info="Maximum response length"
            )
            
            temperature = gr.Slider(
                minimum=0.1,
                maximum=1.0,
                value=0.7,
                step=0.05,
                label="Temperature",
                info="Reasoning creativity (0.7 recommended)"
            )
            
            top_p = gr.Slider(
                minimum=0.8,
                maximum=1.0,
                value=0.95,
                step=0.01,
                label="Top-p",
                info="Focus precision (0.95 recommended)"
            )
            
            gr.Markdown("""
            ### 🎯 Usage Guidelines:
            
            **Best for:**
            - Clinical case analysis
            - Differential diagnosis reasoning  
            - Medical education scenarios
            - Professional consultation support
            
            **Features:**
            - Transparent `<think>` process
            - Step-by-step clinical reasoning
            - Evidence-based conclusions
            - Professional medical language
            """)
    
    # Event handlers
    def clear_chat():
        return [], ""
    
    def retry_last(history):
        if history:
            last_user_msg = history[-1][0]
            return history[:-1], last_user_msg
        return history, ""
    
    # Button events
    submit_btn.click(
        respond,
        inputs=[msg, chatbot, max_tokens, temperature, top_p],
        outputs=[msg, chatbot]
    )
    
    msg.submit(
        respond,
        inputs=[msg, chatbot, max_tokens, temperature, top_p],
        outputs=[msg, chatbot]
    )
    
    clear_btn.click(clear_chat, outputs=[chatbot, msg])
    retry_btn.click(retry_last, inputs=[chatbot], outputs=[chatbot, msg])
    
    # Medical case examples
    gr.Examples(
        examples=[
            "Patient: 72-year-old with history of hypertension presents with confusion, right-sided weakness, and slurred speech. What is the likely cause and immediate steps?",
            "Patient: 45-year-old with sudden onset severe headache described as 'the worst ever'. What should be ruled out and how?",
            "Patient: 60-year-old with long-standing diabetes has numbness and tingling in both feet. What is the most likely diagnosis and first-line management?",
            "Patient: 30-year-old with polyuria, polydipsia, and weight loss. What investigation confirms the diagnosis?",
            "Patient: 55-year-old with progressive shortness of breath, orthopnea, and ankle swelling. What condition and investigation are likely?",
            "Patient: 25-year-old presents with high fever, sore throat, swollen neck, and drooling. What life-threatening condition must be excluded?"
        ],
        inputs=[msg],
        label="📋 Clinical Case Examples (Try these scenarios):"
    )
    
    # Footer
    gr.HTML("""
    <div class="footer-text">
        <p><strong>Model:</strong> yasserrmd/SinaReason-Magistral-2509 (24B parameters)</p>
        <p><strong>Base:</strong> Magistral-Small-2509 | <strong>Inspired by:</strong> Ibn Sina (Avicenna)</p>
        <p><strong>Dataset:</strong> FreedomIntelligence/medical-o1-reasoning-SFT</p>
        <p>🚀 <strong>Optimized for:</strong> Hugging Face Zero GPU Spaces</p>
    </div>
    """)


# Launch configuration for HF Spaces
if __name__ == "__main__":
    demo.launch(
        show_error=True
    )