Set higher ZeroGPU limit for visitors.
Browse files- README.md +2 -1
- app.py +1 -1
- quiz_generator.py +2 -1
README.md
CHANGED
|
@@ -29,5 +29,6 @@ I wanted my prototype to have 0 setup for the user if at all possible, and HF Sp
|
|
| 29 |
2. Don't try to run an LLM locally on unknown hardware specs.
|
| 30 |
3. Don't use my key for a publicly-facing app.
|
| 31 |
|
| 32 |
-
I
|
|
|
|
| 33 |
|
|
|
|
| 29 |
2. Don't try to run an LLM locally on unknown hardware specs.
|
| 30 |
3. Don't use my key for a publicly-facing app.
|
| 31 |
|
| 32 |
+
I've set the limit for visitors to 300s. That does mean that the wait for a GPU could be longer, but you'll be able to run more than 1 or 2 trials.
|
| 33 |
+
|
| 34 |
|
app.py
CHANGED
|
@@ -120,4 +120,4 @@ with gr.Blocks() as demo:
|
|
| 120 |
outputs=result_out,
|
| 121 |
)
|
| 122 |
|
| 123 |
-
demo.launch(
|
|
|
|
| 120 |
outputs=result_out,
|
| 121 |
)
|
| 122 |
|
| 123 |
+
demo.launch()
|
quiz_generator.py
CHANGED
|
@@ -43,11 +43,12 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 43 |
|
| 44 |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
| 45 |
|
| 46 |
-
@spaces.GPU
|
| 47 |
def run_inference(prompt_message: str):
|
| 48 |
"""
|
| 49 |
@spaces.GPU is a Hugging Face decorator for GPU inference.
|
| 50 |
Required for the ZeroGPU setting in HF Spaces.
|
|
|
|
| 51 |
See https://huggingface.co/docs/hub/en/spaces-zerogpu
|
| 52 |
|
| 53 |
:param prompt_message: The user message submitted to the LLM
|
|
|
|
| 43 |
|
| 44 |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
| 45 |
|
| 46 |
+
@spaces.GPU(duration=300)
|
| 47 |
def run_inference(prompt_message: str):
|
| 48 |
"""
|
| 49 |
@spaces.GPU is a Hugging Face decorator for GPU inference.
|
| 50 |
Required for the ZeroGPU setting in HF Spaces.
|
| 51 |
+
duration=300 allows visitors to use up to 300s of inference.
|
| 52 |
See https://huggingface.co/docs/hub/en/spaces-zerogpu
|
| 53 |
|
| 54 |
:param prompt_message: The user message submitted to the LLM
|