wizardlm_api / app.py
DR-Rakshitha's picture
Update app.py
319b4d3
raw
history blame
1.72 kB
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
# Specify the path to your fine-tuned model and tokenizer
# model_path = "./" # Assuming the model is in the same directory as your notebook
# model_name = "https://huggingface.co/spaces/DR-Rakshitha/wizardlm_api/blob/main/pytorch_model-00001-of-00002.bin" # Replace with your model name
from llama_cpp import Llama
import timeit
# Load Llama 2 model
llm = Llama(model_path="./pytorch_model-00001-of-00002.bin",
n_ctx=512,
n_batch=128)
# Start timer
start = timeit.default_timer()
# Generate LLM response
# prompt = "What is Python?"
# output = llm(prompt,
# max_tokens=-1,
# echo=False,
# temperature=0.1,
# top_p=0.9)
# Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Define the function for text generation
def generate_text(input_text):
# input_ids = tokenizer(input_text, return_tensors="pt").input_ids
# output = model.generate(input_ids, max_length=50, num_return_sequences=1)
# generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
# return generated_text
output = llm(input_text,
max_tokens=-1,
echo=False,
temperature=0.1,
top_p=0.9)
# Create the Gradio interface
text_generation_interface = gr.Interface(
fn=generate_text,
inputs=[
gr.inputs.Textbox(label="Input Text"),
],
outputs=gr.outputs.Textbox(label="Generated Text"),
title="GPT-4 Text Generation",
)
# Launch the Gradio interface
text_generation_interface.launch()