import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer # Specify the path to your fine-tuned model and tokenizer # model_path = "./" # Assuming the model is in the same directory as your notebook # model_name = "https://huggingface.co/spaces/DR-Rakshitha/wizardlm_api/blob/main/pytorch_model-00001-of-00002.bin" # Replace with your model name from llama_cpp import Llama import timeit # Load Llama 2 model llm = Llama(model_path="./pytorch_model-00001-of-00002.bin", n_ctx=512, n_batch=128) # Start timer start = timeit.default_timer() # Generate LLM response # prompt = "What is Python?" # output = llm(prompt, # max_tokens=-1, # echo=False, # temperature=0.1, # top_p=0.9) # Load the model and tokenizer model = AutoModelForCausalLM.from_pretrained(model_path) tokenizer = AutoTokenizer.from_pretrained(model_path) # Define the function for text generation def generate_text(input_text): # input_ids = tokenizer(input_text, return_tensors="pt").input_ids # output = model.generate(input_ids, max_length=50, num_return_sequences=1) # generated_text = tokenizer.decode(output[0], skip_special_tokens=True) # return generated_text output = llm(input_text, max_tokens=-1, echo=False, temperature=0.1, top_p=0.9) # Create the Gradio interface text_generation_interface = gr.Interface( fn=generate_text, inputs=[ gr.inputs.Textbox(label="Input Text"), ], outputs=gr.outputs.Textbox(label="Generated Text"), title="GPT-4 Text Generation", ) # Launch the Gradio interface text_generation_interface.launch()