# import gradio as gr # from transformers import AutoModelForCausalLM, AutoTokenizer # from gpt4all import GPT4All # model = GPT4All("wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin") #---------------------------------------------------------------------------------------------------------------------------- from transformers import AutoModelForCausalLM, AutoTokenizer # Path to the model directory (assuming it's in the same directory as your script) model_directory = "./" # Load the model and tokenizer model = AutoModelForCausalLM.from_pretrained(model_directory, from_tf=True) tokenizer = AutoTokenizer.from_pretrained(model_directory, trust_remote_code=True) # Now you can generate text as before # prompt = "What is a large language model?" # input_ids = tokenizer.encode(prompt, return_tensors="pt") # output = model.generate(input_ids, max_length=200, num_return_sequences=1) # generated_text = tokenizer.decode(output[0], skip_special_tokens=True) # print(generated_text) # --------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Ignore warnings logging.set_verbosity(logging.CRITICAL) # Run text generation pipeline with our next model # prompt = "What is a large language model?" # pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200) # result = pipe(f"[INST] {prompt} [/INST]") # print(result[0]['generated_text']) #--------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Ignore warnings # logging.set_verbosity(logging.CRITICAL) # Run text generation pipeline with our next model # prompt = "What is a large language model?" # pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200) # result = pipe(f"[INST] {prompt} [/INST]") # print(result[0]['generated_text']) def generate_text(prompt): # output = model.generate(input_text) # pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200) # result = pipe(f"[INST] {prompt} [/INST]") # prompt = "What is a large language model?" input_ids = tokenizer.encode(prompt, return_tensors="pt") output = model.generate(input_ids, max_length=200, num_return_sequences=1) result = tokenizer.decode(output[0], skip_special_tokens=True) return result text_generation_interface = gr.Interface( fn=generate_text, inputs=[ gr.inputs.Textbox(label="Input Text"), ], outputs=gr.outputs.Textbox(label="Generated Text"), title="GPT-4 Text Generation", ).launch() # model_name = ""