Spaces:
Build error
Build error
modified the chunk limit and added error handing and caching of the model
Browse files
app.py
CHANGED
|
@@ -6,6 +6,8 @@ import torch
|
|
| 6 |
from threading import Thread
|
| 7 |
import logging
|
| 8 |
import spaces
|
|
|
|
|
|
|
| 9 |
# Set up logging
|
| 10 |
logging.basicConfig(level=logging.INFO)
|
| 11 |
logger = logging.getLogger(__name__)
|
|
@@ -48,20 +50,31 @@ bnb_config = BitsAndBytesConfig(
|
|
| 48 |
bnb_4bit_compute_dtype=torch.bfloat16
|
| 49 |
)
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
try:
|
| 52 |
-
|
| 53 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 54 |
-
logger.info("Loading model...")
|
| 55 |
-
model = AutoModelForCausalLM.from_pretrained(
|
| 56 |
-
model_id,
|
| 57 |
-
device_map="auto",
|
| 58 |
-
quantization_config=bnb_config,
|
| 59 |
-
torch_dtype=torch.bfloat16
|
| 60 |
-
)
|
| 61 |
-
model.generation_config.pad_token_id = tokenizer.pad_token_id
|
| 62 |
-
logger.info("Model and tokenizer loaded successfully.")
|
| 63 |
except Exception as e:
|
| 64 |
-
logger.error(f"
|
| 65 |
raise
|
| 66 |
|
| 67 |
terminators = [
|
|
@@ -76,7 +89,7 @@ Bad JSON example: {'lobby': { 'frcm': { 'replace': [ 'carpet', 'carpet_pad', 'ba
|
|
| 76 |
Make sure to fetch details from the provided text and ignore unnecessary information. The response should be in JSON format only, without any additional comments.
|
| 77 |
"""
|
| 78 |
|
| 79 |
-
def chunk_text(text, chunk_size=
|
| 80 |
"""
|
| 81 |
Splits the input text into chunks of specified size.
|
| 82 |
|
|
@@ -185,7 +198,7 @@ with gr.Blocks(fill_height=True, css=css) as demo:
|
|
| 185 |
additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
|
| 186 |
additional_inputs=[
|
| 187 |
gr.Slider(minimum=0, maximum=1, step=0.1, value=0.95, label="Temperature", render=False),
|
| 188 |
-
gr.Slider(minimum=128, maximum=
|
| 189 |
]
|
| 190 |
)
|
| 191 |
|
|
|
|
| 6 |
from threading import Thread
|
| 7 |
import logging
|
| 8 |
import spaces
|
| 9 |
+
from functools import lru_cache
|
| 10 |
+
|
| 11 |
# Set up logging
|
| 12 |
logging.basicConfig(level=logging.INFO)
|
| 13 |
logger = logging.getLogger(__name__)
|
|
|
|
| 50 |
bnb_4bit_compute_dtype=torch.bfloat16
|
| 51 |
)
|
| 52 |
|
| 53 |
+
@lru_cache(maxsize=1)
|
| 54 |
+
def load_model_and_tokenizer():
|
| 55 |
+
try:
|
| 56 |
+
start_time = time.time()
|
| 57 |
+
logger.info("Loading tokenizer...")
|
| 58 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 59 |
+
logger.info("Loading model...")
|
| 60 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 61 |
+
model_id,
|
| 62 |
+
device_map="auto",
|
| 63 |
+
quantization_config=bnb_config,
|
| 64 |
+
torch_dtype=torch.bfloat16
|
| 65 |
+
)
|
| 66 |
+
model.generation_config.pad_token_id = tokenizer.pad_token_id
|
| 67 |
+
end_time = time.time()
|
| 68 |
+
logger.info(f"Model and tokenizer loaded successfully in {end_time - start_time} seconds.")
|
| 69 |
+
return model, tokenizer
|
| 70 |
+
except Exception as e:
|
| 71 |
+
logger.error(f"Error loading model or tokenizer: {e}")
|
| 72 |
+
raise
|
| 73 |
+
|
| 74 |
try:
|
| 75 |
+
model, tokenizer = load_model_and_tokenizer()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
except Exception as e:
|
| 77 |
+
logger.error(f"Failed to load model and tokenizer: {e}")
|
| 78 |
raise
|
| 79 |
|
| 80 |
terminators = [
|
|
|
|
| 89 |
Make sure to fetch details from the provided text and ignore unnecessary information. The response should be in JSON format only, without any additional comments.
|
| 90 |
"""
|
| 91 |
|
| 92 |
+
def chunk_text(text, chunk_size=5000):
|
| 93 |
"""
|
| 94 |
Splits the input text into chunks of specified size.
|
| 95 |
|
|
|
|
| 198 |
additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
|
| 199 |
additional_inputs=[
|
| 200 |
gr.Slider(minimum=0, maximum=1, step=0.1, value=0.95, label="Temperature", render=False),
|
| 201 |
+
gr.Slider(minimum=128, maximum=2000, step=1, value=700, label="Max new tokens", render=False),
|
| 202 |
]
|
| 203 |
)
|
| 204 |
|