import gradio as gr
import speech_recognition as sr
from datetime import datetime, timedelta
import os
import threading
from huggingface_hub import InferenceClient
from dotenv import load_dotenv
load_dotenv()


system_prompt = """
You are an AI coding assistant designed to solve coding problems and provide code snippets based on the user's query. When given a query, follow these guidelines.
1. Return only the necessary and helpful code.
2. Include any related details that enhance understanding or usability of the code.
3. Ensure the code is clean, efficient, and follows best practices.
4. Add comments to explain complex or non-obvious parts of the code.
5. If there are multiple possible solutions, provide the most optimal one first.
"""
ip_requests = {}
ip_requests_lock = threading.Lock()

def allow_ip(request: gr.Request, show_error=True):
    ip = request.headers.get("X-Forwarded-For")
    now = datetime.now()
    window = timedelta(hours=24)
    with ip_requests_lock:
        if ip in ip_requests:
            ip_requests[ip] = [timestamp for timestamp in ip_requests[ip] if now - timestamp < window]
        if len(ip_requests.get(ip, [])) >= 15:
            raise gr.Error("Rate limit exceeded. Please try again tomorrow or use your Hugging Face Pro token.", visible=show_error)
        ip_requests.setdefault(ip, []).append(now)
        print("ip_requests", ip_requests)
    return True

def inference(prompt, hf_token, model, model_name, max_new_tokens):
    messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}]
    if hf_token is None or not hf_token.strip():
        hf_token = os.getenv("HF_TOKEN")
    client = InferenceClient(model=model, token=hf_token)
    tokens = f"**`{model_name}`**\n\n"
    for completion in client.chat_completion(messages, max_tokens=max_new_tokens, stream=True):
        token = completion.choices[0].delta.content
        tokens += token
    yield tokens

def speech_to_text(audio):
    recognizer = sr.Recognizer()
    try:
        with sr.AudioFile(audio) as source:
            audio_data = recognizer.record(source)
            try:
                text = recognizer.recognize_google(audio_data)
            except sr.UnknownValueError:
                text = "Google Speech Recognition could not understand the audio"
            except sr.RequestError as e:
                text = f"Could not request results from Google Speech Recognition service; {e}"
    except Exception as e:
        text = f"Could not process the audio, please try to record one more time"
    return text

def respond(message, chat_history, system_prompt, hf_token, model_id, max_new_tokens):
    bot_message = ""
    for token in inference(message, hf_token, model_id, model_id.split("/")[-1], max_new_tokens):
        bot_message += token
    chat_history.append((message, bot_message))
    yield "", chat_history
        
def clear_chat(chat_history):
    return []

def transcribe_audio(audio):
    text = speech_to_text(audio)
    return text

def llm_ui():
    with gr.Blocks() as demo:
        
        model_ids_llm = ["Mistralai/Mistral-7B-Instruct-v0.2", "meta-llama/Meta-Llama-3-8B-Instruct",  "meta-llama/Meta-Llama-3.1-8B-Instruct"]
        
        gr.Markdown("# AI Coding Assistant")

        with gr.Row():
            chatbot = gr.Chatbot()
            
        hf_token_box = gr.Textbox(lines=1, placeholder="Check if you have access to selected model", label="Hugging Face Token - Required", type="password")
        
        with gr.Group():
            with gr.Row():
                user_input = gr.Textbox(placeholder="Type your coding problem here...", label="User Input", show_label=False, scale=8)
                send_button = gr.Button("Send", scale=2, variant = "primary")
            
        with gr.Row():
            model_selection = gr.Dropdown(choices=model_ids_llm, value=model_ids_llm[0], label="Model", scale=3)
            clear_button = gr.Button("Clear Chat", scale=2)
        
        with gr.Row():
            voice_input = gr.Microphone(type="filepath", label="Voice Input", scale=7)
            voice_button = gr.Button("Use Audio as User Input", scale=3)
            voice_button.click(transcribe_audio, inputs=voice_input, outputs=user_input)

        max_new_tokens_slider = gr.Slider(minimum=50, maximum=2000, value=500, step=10, label="Max New Tokens", info="Maximum number of tokens to generate in the response.")

        # Wrap system_prompt in a Gradio component
        system_prompt_component = gr.State(value=system_prompt)

        send_button.click(respond, [user_input, chatbot, system_prompt_component, hf_token_box, model_selection, max_new_tokens_slider], [user_input, chatbot], scroll_to_output=True)
        clear_button.click(clear_chat, [chatbot], [chatbot])
    return demo

def multimodal_llm_ui():
    with gr.Blocks() as demo:
        
        model_ids_multimodal = ["Model-1", "Model-2",  "Model-3"]
        
        gr.Markdown("# Coding Vision Model")

        with gr.Row():
            chatbot = gr.Chatbot(height=550)
            with gr.Column():
                hf_token_box = gr.Textbox(lines=1, placeholder="Check if you have access to selected model", label="Hugging Face Token - Required", type="password")
                
                image_input = gr.Image(type="filepath", label="Input your Image Here....")
                
                with gr.Group():
                    with gr.Row():
                        user_input = gr.Textbox(placeholder="Type your problem here...", label="User Input", show_label=False, scale=8)
                        send_button = gr.Button("Send", scale=2, elem_id="send-button", variant = "primary")

                with gr.Row():
                    model_selection = gr.Dropdown(choices=model_ids_multimodal, value=model_ids_multimodal[0], label="Select Model", scale=3)
                    clear_button = gr.Button("Clear Chat", scale=2)
                
                with gr.Row():
                    voice_input = gr.Microphone(type="filepath", label="Voice Input", scale=7)
                    voice_button = gr.Button("Use Audio as User Input", scale=3)
                    voice_button.click(transcribe_audio, inputs=voice_input, outputs=user_input)

                max_new_tokens_slider = gr.Slider(minimum=50, maximum=2000, value=500, step=10, label="Max New Tokens", info="Maximum number of tokens to generate in the response.")

        # Wrap system_prompt in a Gradio component
        system_prompt_component = gr.State(value=system_prompt)

        send_button.click(respond, [user_input, chatbot, system_prompt_component, hf_token_box, model_selection, max_new_tokens_slider], [user_input, chatbot])
        clear_button.click(clear_chat, [chatbot], [chatbot])
    return demo

demo = gr.TabbedInterface([llm_ui(), multimodal_llm_ui()], ["LLM", "Image + LLM"])

demo.launch()