Spaces:
Runtime error
Runtime error
Commit
·
319b4d3
1
Parent(s):
f4b0962
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,8 +2,29 @@ import gradio as gr
|
|
| 2 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 3 |
|
| 4 |
# Specify the path to your fine-tuned model and tokenizer
|
| 5 |
-
model_path = "./" # Assuming the model is in the same directory as your notebook
|
| 6 |
-
model_name = "https://huggingface.co/spaces/DR-Rakshitha/wizardlm_api/blob/main/pytorch_model-00001-of-00002.bin" # Replace with your model name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
# Load the model and tokenizer
|
| 9 |
model = AutoModelForCausalLM.from_pretrained(model_path)
|
|
@@ -11,10 +32,16 @@ tokenizer = AutoTokenizer.from_pretrained(model_path)
|
|
| 11 |
|
| 12 |
# Define the function for text generation
|
| 13 |
def generate_text(input_text):
|
| 14 |
-
input_ids = tokenizer(input_text, return_tensors="pt").input_ids
|
| 15 |
-
output = model.generate(input_ids, max_length=50, num_return_sequences=1)
|
| 16 |
-
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
|
| 17 |
-
return generated_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
# Create the Gradio interface
|
| 20 |
text_generation_interface = gr.Interface(
|
|
|
|
| 2 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 3 |
|
| 4 |
# Specify the path to your fine-tuned model and tokenizer
|
| 5 |
+
# model_path = "./" # Assuming the model is in the same directory as your notebook
|
| 6 |
+
# model_name = "https://huggingface.co/spaces/DR-Rakshitha/wizardlm_api/blob/main/pytorch_model-00001-of-00002.bin" # Replace with your model name
|
| 7 |
+
|
| 8 |
+
from llama_cpp import Llama
|
| 9 |
+
import timeit
|
| 10 |
+
|
| 11 |
+
# Load Llama 2 model
|
| 12 |
+
llm = Llama(model_path="./pytorch_model-00001-of-00002.bin",
|
| 13 |
+
n_ctx=512,
|
| 14 |
+
n_batch=128)
|
| 15 |
+
|
| 16 |
+
# Start timer
|
| 17 |
+
start = timeit.default_timer()
|
| 18 |
+
|
| 19 |
+
# Generate LLM response
|
| 20 |
+
# prompt = "What is Python?"
|
| 21 |
+
|
| 22 |
+
# output = llm(prompt,
|
| 23 |
+
# max_tokens=-1,
|
| 24 |
+
# echo=False,
|
| 25 |
+
# temperature=0.1,
|
| 26 |
+
# top_p=0.9)
|
| 27 |
+
|
| 28 |
|
| 29 |
# Load the model and tokenizer
|
| 30 |
model = AutoModelForCausalLM.from_pretrained(model_path)
|
|
|
|
| 32 |
|
| 33 |
# Define the function for text generation
|
| 34 |
def generate_text(input_text):
|
| 35 |
+
# input_ids = tokenizer(input_text, return_tensors="pt").input_ids
|
| 36 |
+
# output = model.generate(input_ids, max_length=50, num_return_sequences=1)
|
| 37 |
+
# generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
|
| 38 |
+
# return generated_text
|
| 39 |
+
|
| 40 |
+
output = llm(input_text,
|
| 41 |
+
max_tokens=-1,
|
| 42 |
+
echo=False,
|
| 43 |
+
temperature=0.1,
|
| 44 |
+
top_p=0.9)
|
| 45 |
|
| 46 |
# Create the Gradio interface
|
| 47 |
text_generation_interface = gr.Interface(
|