import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from gpt4all import GPT4All

model = GPT4All("wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin")

# model = AutoModelForCausalLM.from_pretrained(
#     "tiiuae/falcon-7b-instruct",
#     torch_dtype=torch.bfloat16,
#     trust_remote_code=True,
#     device_map="auto",
#     low_cpu_mem_usage=True,
# )
# tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct")


def generate_text(input_text):
    # input_ids = tokenizer.encode(input_text, return_tensors="pt")
    # attention_mask = torch.ones(input_ids.shape)

    output = model.generate(
        input_text
        # input_ids,
        # attention_mask=attention_mask,
        # max_length=200,
        # do_sample=True,
        # top_k=10,
        # num_return_sequences=1,
        # eos_token_id=tokenizer.eos_token_id,
    )

    # output_text = tokenizer.decode(output[0], skip_special_tokens=True)
    # print(output_text)

    # Remove Prompt Echo from Generated Text
    # cleaned_output_text = output_text.replace(input_text, "")
    return output


text_generation_interface = gr.Interface(
    fn=generate_text,
    inputs=[
        gr.inputs.Textbox(label="Input Text"),
    ],
    outputs=gr.inputs.Textbox(label="Generated Text"),
    title="Falcon-7B Instruct",
).launch()