from langchain.llms import CTransformers
from langchain.chains import LLMChain 
from langchain import PromptTemplate
import os
import io
import gradio as gr
import time

custom_prompt_template = """
You are an AI coding assistant and your task is to resolve coding issues and return code snippets for the same based on the user's given query.
Query : {query}
You just return the helpful code and the related details.
Helpful code and related details:
"""

def set_custom_prompt():
    prompt=PromptTemplate(
        template = custom_prompt_template,
        input_variables = ['query']
    )

    return prompt

def load_model():
    llm = CTransformers(
        model = 'codellama-7b-instruct.ggmlv3.Q4_0.bin',
        model_type = 'llama',
        max_new_tokens = 1096,
        temperature = 0.2,
        repetition_penalty = 1.13,
        #gpu_layers = 3
    )
    return llm

def chain_pipeline():
    llm = load_model()
    qa_prompt = set_custom_prompt()
    qa_chain = LLMChain(
        prompt = qa_prompt,
        llm = llm
    )
    return qa_chain

llmchain = chain_pipeline()

def bot(query):
    llm_response = llmchain.run({"query":query})
    return llm_response

with gr.Blocks(title="Code Llama Srini") as demo:
    gr.Markdown("# Code Llama Demo")
    chatbot = gr.Chatbot([], elem_id="chatbot",height=700)
    msg = gr.Textbox()
    clear = gr.ClearButton([msg,chatbot])


    def respond(message,chat_history):
        bot_message = bot(message)
        chat_history.append((message,bot_message))
        time.sleep(2)
        return "", chat_history

    msg.submit(respond,[msg,chatbot],[msg,chatbot])


demo.launch()