Spaces:
Sleeping
Sleeping
fix runtime error
Browse files- Dockerfile +3 -2
- app.py +19 -3
Dockerfile
CHANGED
|
@@ -14,11 +14,12 @@ RUN pip cache purge && pip install --no-cache-dir -r requirements.txt
|
|
| 14 |
# Copy source code
|
| 15 |
COPY app.py .
|
| 16 |
|
| 17 |
-
# Create cache
|
| 18 |
-
RUN mkdir -p /app/cache && chmod -R 777 /app/cache
|
| 19 |
|
| 20 |
# Set env vars
|
| 21 |
ENV HF_HOME=/app/cache \
|
|
|
|
| 22 |
PYTHONUNBUFFERED=1 \
|
| 23 |
PYTHONWARNINGS=ignore::FutureWarning,ignore::UserWarning:torch._utils
|
| 24 |
|
|
|
|
| 14 |
# Copy source code
|
| 15 |
COPY app.py .
|
| 16 |
|
| 17 |
+
# Create cache directories and set permissions
|
| 18 |
+
RUN mkdir -p /app/cache /app/cache/matplotlib && chmod -R 777 /app/cache
|
| 19 |
|
| 20 |
# Set env vars
|
| 21 |
ENV HF_HOME=/app/cache \
|
| 22 |
+
MPLCONFIGDIR=/app/cache/matplotlib \
|
| 23 |
PYTHONUNBUFFERED=1 \
|
| 24 |
PYTHONWARNINGS=ignore::FutureWarning,ignore::UserWarning:torch._utils
|
| 25 |
|
app.py
CHANGED
|
@@ -15,6 +15,11 @@ except Exception as e:
|
|
| 15 |
print(f"Error loading model: {e}")
|
| 16 |
raise e
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
# Set device
|
| 19 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 20 |
model.to(device)
|
|
@@ -26,13 +31,24 @@ print(f"Memory allocated: {torch.cuda.memory_allocated(device)/1e9:.2f} GB" if t
|
|
| 26 |
|
| 27 |
def generate_text(prompt, max_length=100, temperature=1.0):
|
| 28 |
try:
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
outputs = model.generate(
|
| 31 |
-
inputs,
|
|
|
|
| 32 |
max_length=max_length,
|
| 33 |
temperature=temperature,
|
| 34 |
do_sample=True,
|
| 35 |
-
num_beams=1
|
|
|
|
| 36 |
)
|
| 37 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 38 |
except Exception as e:
|
|
|
|
| 15 |
print(f"Error loading model: {e}")
|
| 16 |
raise e
|
| 17 |
|
| 18 |
+
# Set pad_token_id to eos_token_id if not set
|
| 19 |
+
if tokenizer.pad_token_id is None:
|
| 20 |
+
tokenizer.pad_token_id = tokenizer.eos_token_id
|
| 21 |
+
model.config.pad_token_id = tokenizer.eos_token_id
|
| 22 |
+
|
| 23 |
# Set device
|
| 24 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 25 |
model.to(device)
|
|
|
|
| 31 |
|
| 32 |
def generate_text(prompt, max_length=100, temperature=1.0):
|
| 33 |
try:
|
| 34 |
+
# Encode input with attention mask
|
| 35 |
+
inputs = tokenizer(
|
| 36 |
+
prompt,
|
| 37 |
+
return_tensors="pt",
|
| 38 |
+
padding=True,
|
| 39 |
+
truncation=True,
|
| 40 |
+
max_length=max_length
|
| 41 |
+
).to(device)
|
| 42 |
+
|
| 43 |
+
# Generate text
|
| 44 |
outputs = model.generate(
|
| 45 |
+
input_ids=inputs["input_ids"],
|
| 46 |
+
attention_mask=inputs["attention_mask"],
|
| 47 |
max_length=max_length,
|
| 48 |
temperature=temperature,
|
| 49 |
do_sample=True,
|
| 50 |
+
num_beams=1,
|
| 51 |
+
pad_token_id=tokenizer.pad_token_id
|
| 52 |
)
|
| 53 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 54 |
except Exception as e:
|