first app version
Browse files- .beamignore +28 -0
- .gitattributes +0 -35
- .gitignore +8 -0
- README.md +1 -0
- app.py +124 -0
- index.html +0 -19
- style.css +0 -28
- utils.py +36 -0
.beamignore
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Generated by Beam SDK
|
| 2 |
+
.beamignore
|
| 3 |
+
pyproject.toml
|
| 4 |
+
.git
|
| 5 |
+
.idea
|
| 6 |
+
.python-version
|
| 7 |
+
.vscode
|
| 8 |
+
.venv
|
| 9 |
+
venv
|
| 10 |
+
__pycache__
|
| 11 |
+
.DS_Store
|
| 12 |
+
.config
|
| 13 |
+
drive/MyDrive
|
| 14 |
+
.coverage
|
| 15 |
+
.pytest_cache
|
| 16 |
+
.ipynb
|
| 17 |
+
.ruff_cache
|
| 18 |
+
.dockerignore
|
| 19 |
+
.ipynb_checkpoints
|
| 20 |
+
.env.local
|
| 21 |
+
.envrc
|
| 22 |
+
**/__pycache__/
|
| 23 |
+
**/.pytest_cache/
|
| 24 |
+
**/node_modules/
|
| 25 |
+
**/.venv/
|
| 26 |
+
*.pyc
|
| 27 |
+
.next/
|
| 28 |
+
.circleci
|
.gitattributes
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.DS_Store
|
| 2 |
+
__pycache__
|
| 3 |
+
__downloads__
|
| 4 |
+
.env
|
| 5 |
+
.venv/
|
| 6 |
+
.vscode/launch.json
|
| 7 |
+
.secrets
|
| 8 |
+
.idea/
|
README.md
CHANGED
|
@@ -4,6 +4,7 @@ emoji: 🚀
|
|
| 4 |
colorFrom: indigo
|
| 5 |
colorTo: pink
|
| 6 |
sdk: static
|
|
|
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
|
|
|
| 4 |
colorFrom: indigo
|
| 5 |
colorTo: pink
|
| 6 |
sdk: static
|
| 7 |
+
app_file: README.md
|
| 8 |
pinned: false
|
| 9 |
---
|
| 10 |
|
app.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from threading import Thread
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
+
from beam import Image, Volume, GpuType, asgi
|
| 5 |
+
from fastapi import FastAPI
|
| 6 |
+
from fastapi.responses import StreamingResponse
|
| 7 |
+
from transformers import (
|
| 8 |
+
AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer,
|
| 9 |
+
PreTrainedTokenizerFast, PreTrainedModel, StoppingCriteriaList
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
from utils import MaxPostsStoppingCriteria, Body, fallback
|
| 13 |
+
|
| 14 |
+
SETTINGS = {
|
| 15 |
+
"model_name": "Error410/JVCGPT-Medium",
|
| 16 |
+
"beam_volume_path": "./cached_models",
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
# @see https://huggingface.co/docs/transformers/generation_strategies#customize-text-generation
|
| 20 |
+
DEFAULTS = {
|
| 21 |
+
"max_length": 2048, # 512
|
| 22 |
+
"temperature": 0.9, # 1
|
| 23 |
+
"top_p": 1, # 0.95
|
| 24 |
+
"top_k": 0, # 40
|
| 25 |
+
"repetition_penalty": 1.0, # 1.0
|
| 26 |
+
"no_repeat_ngram_size": 0, # 0
|
| 27 |
+
"do_sample": True, # True
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def load_models():
|
| 32 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
| 33 |
+
SETTINGS["model_name"],
|
| 34 |
+
cache_dir=SETTINGS["beam_volume_path"]
|
| 35 |
+
)
|
| 36 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 37 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 38 |
+
SETTINGS["model_name"],
|
| 39 |
+
device_map="auto",
|
| 40 |
+
torch_dtype=torch.float16,
|
| 41 |
+
cache_dir=SETTINGS["beam_volume_path"],
|
| 42 |
+
)
|
| 43 |
+
return model, tokenizer
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def stream(model: PreTrainedModel, tokenizer: PreTrainedTokenizerFast, body: Body):
|
| 47 |
+
generate_args = {
|
| 48 |
+
"max_length": fallback(body.max_length, DEFAULTS["max_length"]),
|
| 49 |
+
"temperature": fallback(body.temperature, DEFAULTS["temperature"]),
|
| 50 |
+
"top_p": fallback(body.top_p, DEFAULTS["top_p"]),
|
| 51 |
+
"top_k": fallback(body.top_k, DEFAULTS["top_k"]),
|
| 52 |
+
"repetition_penalty": fallback(body.repetition_penalty, DEFAULTS["repetition_penalty"]),
|
| 53 |
+
"no_repeat_ngram_size": fallback(body.no_repeat_ngram_size, DEFAULTS["no_repeat_ngram_size"]),
|
| 54 |
+
"do_sample": fallback(body.do_sample, DEFAULTS["do_sample"]),
|
| 55 |
+
"use_cache": True,
|
| 56 |
+
"eos_token_id": tokenizer.eos_token_id,
|
| 57 |
+
"pad_token_id": tokenizer.pad_token_id,
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
inputs = tokenizer(body.prompt, return_tensors="pt", padding=True)
|
| 61 |
+
input_ids = inputs["input_ids"].to("cuda")
|
| 62 |
+
attention_mask = inputs["attention_mask"].to("cuda")
|
| 63 |
+
|
| 64 |
+
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=False, timeout=240)
|
| 65 |
+
|
| 66 |
+
# with torch.no_grad(): # seems to be useless
|
| 67 |
+
thread = Thread(
|
| 68 |
+
target=model.generate,
|
| 69 |
+
kwargs={
|
| 70 |
+
"input_ids": input_ids,
|
| 71 |
+
"attention_mask": attention_mask,
|
| 72 |
+
"streamer": streamer,
|
| 73 |
+
"stopping_criteria": StoppingCriteriaList([MaxPostsStoppingCriteria(tokenizer, body.posts_count)]),
|
| 74 |
+
**generate_args,
|
| 75 |
+
}
|
| 76 |
+
)
|
| 77 |
+
thread.start()
|
| 78 |
+
|
| 79 |
+
for token in streamer:
|
| 80 |
+
yield token
|
| 81 |
+
# if len(token) > 0:
|
| 82 |
+
# yield f"DATA {token}"
|
| 83 |
+
#
|
| 84 |
+
# yield "EOS"
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
@asgi(
|
| 88 |
+
on_start=load_models,
|
| 89 |
+
cpu=2.0,
|
| 90 |
+
memory="16Gi",
|
| 91 |
+
gpu=GpuType.A100_40,
|
| 92 |
+
gpu_count=1,
|
| 93 |
+
timeout=900, # Time for loading the model and run the server
|
| 94 |
+
image=Image(
|
| 95 |
+
python_version="python3.12",
|
| 96 |
+
python_packages=[
|
| 97 |
+
"fastapi",
|
| 98 |
+
"torch",
|
| 99 |
+
"transformers",
|
| 100 |
+
"accelerate",
|
| 101 |
+
"huggingface_hub[hf-transfer]",
|
| 102 |
+
],
|
| 103 |
+
env_vars=["HF_HUB_ENABLE_HF_TRANSFER=1"],
|
| 104 |
+
),
|
| 105 |
+
volumes=[
|
| 106 |
+
Volume(
|
| 107 |
+
name="cached_models",
|
| 108 |
+
mount_path=SETTINGS["beam_volume_path"],
|
| 109 |
+
)
|
| 110 |
+
],
|
| 111 |
+
)
|
| 112 |
+
def server(context):
|
| 113 |
+
model, tokenizer = context.on_start_value
|
| 114 |
+
app = FastAPI()
|
| 115 |
+
|
| 116 |
+
@app.post("/stream")
|
| 117 |
+
async def stream_endpoint(body: Body) -> StreamingResponse:
|
| 118 |
+
return StreamingResponse(
|
| 119 |
+
stream(model, tokenizer, body),
|
| 120 |
+
media_type='text/event-stream',
|
| 121 |
+
headers={"Cache-Control": "no-cache"},
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
return app
|
index.html
DELETED
|
@@ -1,19 +0,0 @@
|
|
| 1 |
-
<!doctype html>
|
| 2 |
-
<html>
|
| 3 |
-
<head>
|
| 4 |
-
<meta charset="utf-8" />
|
| 5 |
-
<meta name="viewport" content="width=device-width" />
|
| 6 |
-
<title>My static Space</title>
|
| 7 |
-
<link rel="stylesheet" href="style.css" />
|
| 8 |
-
</head>
|
| 9 |
-
<body>
|
| 10 |
-
<div class="card">
|
| 11 |
-
<h1>Welcome to your static Space!</h1>
|
| 12 |
-
<p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
|
| 13 |
-
<p>
|
| 14 |
-
Also don't forget to check the
|
| 15 |
-
<a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
|
| 16 |
-
</p>
|
| 17 |
-
</div>
|
| 18 |
-
</body>
|
| 19 |
-
</html>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
style.css
DELETED
|
@@ -1,28 +0,0 @@
|
|
| 1 |
-
body {
|
| 2 |
-
padding: 2rem;
|
| 3 |
-
font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
|
| 4 |
-
}
|
| 5 |
-
|
| 6 |
-
h1 {
|
| 7 |
-
font-size: 16px;
|
| 8 |
-
margin-top: 0;
|
| 9 |
-
}
|
| 10 |
-
|
| 11 |
-
p {
|
| 12 |
-
color: rgb(107, 114, 128);
|
| 13 |
-
font-size: 15px;
|
| 14 |
-
margin-bottom: 10px;
|
| 15 |
-
margin-top: 5px;
|
| 16 |
-
}
|
| 17 |
-
|
| 18 |
-
.card {
|
| 19 |
-
max-width: 620px;
|
| 20 |
-
margin: 0 auto;
|
| 21 |
-
padding: 16px;
|
| 22 |
-
border: 1px solid lightgray;
|
| 23 |
-
border-radius: 16px;
|
| 24 |
-
}
|
| 25 |
-
|
| 26 |
-
.card p:last-child {
|
| 27 |
-
margin-bottom: 0;
|
| 28 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
from transformers import (PreTrainedTokenizerFast, StoppingCriteria)
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def fallback(value, fallback_value):
|
| 6 |
+
if value is None:
|
| 7 |
+
return fallback_value
|
| 8 |
+
return value
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class Body(BaseModel):
|
| 12 |
+
prompt: str
|
| 13 |
+
posts_count: int
|
| 14 |
+
max_length: int | None = None
|
| 15 |
+
temperature: float | None = None
|
| 16 |
+
top_p: float | None = None
|
| 17 |
+
top_k: float | None = None
|
| 18 |
+
repetition_penalty: float | None = None
|
| 19 |
+
no_repeat_ngram_size: float | None = None
|
| 20 |
+
do_sample: bool | None = None
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class MaxPostsStoppingCriteria(StoppingCriteria):
|
| 24 |
+
def __init__(self, tokenizer: PreTrainedTokenizerFast, posts_count: int):
|
| 25 |
+
self.end_of_post_token_id = tokenizer.encode("<|end_of_post|>", add_special_tokens=False)
|
| 26 |
+
self.posts_count = posts_count
|
| 27 |
+
self.counter = 0
|
| 28 |
+
|
| 29 |
+
def __call__(self, input_ids, scores, **kwargs):
|
| 30 |
+
# Check if the last token matches the <|end_of_post|> token ID
|
| 31 |
+
for sequence in input_ids:
|
| 32 |
+
if sequence[-len(self.end_of_post_token_id):].tolist() == self.end_of_post_token_id:
|
| 33 |
+
self.counter += 1
|
| 34 |
+
if self.counter >= self.posts_count:
|
| 35 |
+
return True
|
| 36 |
+
return False
|