Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| # Specify the path to your fine-tuned model and tokenizer | |
| # model_path = "./" # Assuming the model is in the same directory as your notebook | |
| # model_name = "https://huggingface.co/spaces/DR-Rakshitha/wizardlm_api/blob/main/pytorch_model-00001-of-00002.bin" # Replace with your model name | |
| from llama_cpp import Llama | |
| import timeit | |
| # Load Llama 2 model | |
| llm = Llama(model_path="./pytorch_model-00001-of-00002.bin", | |
| n_ctx=512, | |
| n_batch=128) | |
| # Start timer | |
| start = timeit.default_timer() | |
| # Generate LLM response | |
| # prompt = "What is Python?" | |
| # output = llm(prompt, | |
| # max_tokens=-1, | |
| # echo=False, | |
| # temperature=0.1, | |
| # top_p=0.9) | |
| # Load the model and tokenizer | |
| model = AutoModelForCausalLM.from_pretrained(model_path) | |
| tokenizer = AutoTokenizer.from_pretrained(model_path) | |
| # Define the function for text generation | |
| def generate_text(input_text): | |
| # input_ids = tokenizer(input_text, return_tensors="pt").input_ids | |
| # output = model.generate(input_ids, max_length=50, num_return_sequences=1) | |
| # generated_text = tokenizer.decode(output[0], skip_special_tokens=True) | |
| # return generated_text | |
| output = llm(input_text, | |
| max_tokens=-1, | |
| echo=False, | |
| temperature=0.1, | |
| top_p=0.9) | |
| # Create the Gradio interface | |
| text_generation_interface = gr.Interface( | |
| fn=generate_text, | |
| inputs=[ | |
| gr.inputs.Textbox(label="Input Text"), | |
| ], | |
| outputs=gr.outputs.Textbox(label="Generated Text"), | |
| title="GPT-4 Text Generation", | |
| ) | |
| # Launch the Gradio interface | |
| text_generation_interface.launch() | |