hysts HF Staff commited on
Commit
b7a23a9
·
1 Parent(s): 2c78411
Files changed (7) hide show
  1. .pre-commit-config.yaml +4 -4
  2. .python-version +1 -1
  3. README.md +2 -1
  4. app.py +69 -31
  5. pyproject.toml +13 -5
  6. requirements.txt +149 -90
  7. uv.lock +0 -0
.pre-commit-config.yaml CHANGED
@@ -1,6 +1,6 @@
1
  repos:
2
  - repo: https://github.com/pre-commit/pre-commit-hooks
3
- rev: v5.0.0
4
  hooks:
5
  - id: check-executables-have-shebangs
6
  - id: check-json
@@ -14,13 +14,13 @@ repos:
14
  - id: requirements-txt-fixer
15
  - id: trailing-whitespace
16
  - repo: https://github.com/astral-sh/ruff-pre-commit
17
- rev: v0.8.6
18
  hooks:
19
- - id: ruff
20
  args: ["--fix"]
21
  - id: ruff-format
22
  - repo: https://github.com/pre-commit/mirrors-mypy
23
- rev: v1.14.1
24
  hooks:
25
  - id: mypy
26
  args: ["--ignore-missing-imports"]
 
1
  repos:
2
  - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v6.0.0
4
  hooks:
5
  - id: check-executables-have-shebangs
6
  - id: check-json
 
14
  - id: requirements-txt-fixer
15
  - id: trailing-whitespace
16
  - repo: https://github.com/astral-sh/ruff-pre-commit
17
+ rev: v0.15.4
18
  hooks:
19
+ - id: ruff-check
20
  args: ["--fix"]
21
  - id: ruff-format
22
  - repo: https://github.com/pre-commit/mirrors-mypy
23
+ rev: v1.19.1
24
  hooks:
25
  - id: mypy
26
  args: ["--ignore-missing-imports"]
.python-version CHANGED
@@ -1 +1 @@
1
- 3.10
 
1
+ 3.12
README.md CHANGED
@@ -4,7 +4,8 @@ emoji: 😻
4
  colorFrom: indigo
5
  colorTo: pink
6
  sdk: gradio
7
- sdk_version: 5.39.0
 
8
  app_file: app.py
9
  pinned: false
10
  short_description: Chatbot
 
4
  colorFrom: indigo
5
  colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 6.8.0
8
+ python_version: "3.12.12"
9
  app_file: app.py
10
  pinned: false
11
  short_description: Chatbot
app.py CHANGED
@@ -15,16 +15,14 @@ This is a demo of [`meta-llama/Llama-3.2-3B-Instruct`](https://huggingface.co/me
15
  For more details, please check [our post](https://huggingface.co/blog/llama32).
16
  """
17
 
18
- MAX_MAX_NEW_TOKENS = 2048
19
  DEFAULT_MAX_NEW_TOKENS = 1024
20
- MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
21
 
22
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
23
-
24
- model_id = "meta-llama/Llama-3.2-3B-Instruct"
25
- tokenizer = AutoTokenizer.from_pretrained(model_id)
26
  model = AutoModelForCausalLM.from_pretrained(
27
- model_id,
28
  device_map="auto",
29
  torch_dtype=torch.bfloat16,
30
  )
@@ -32,6 +30,51 @@ model.eval()
32
 
33
 
34
  @spaces.GPU(duration=90)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  def generate(
36
  message: str,
37
  chat_history: list[dict],
@@ -41,33 +84,31 @@ def generate(
41
  top_k: int = 50,
42
  repetition_penalty: float = 1.2,
43
  ) -> Iterator[str]:
 
 
 
44
  conversation = [*chat_history, {"role": "user", "content": message}]
45
 
46
- input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
47
- if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
48
- input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
49
- gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
50
- input_ids = input_ids.to(model.device)
 
 
51
 
52
- streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
53
- generate_kwargs = dict(
54
- {"input_ids": input_ids},
55
- streamer=streamer,
 
 
56
  max_new_tokens=max_new_tokens,
57
- do_sample=True,
58
  top_p=top_p,
59
  top_k=top_k,
60
- temperature=temperature,
61
- num_beams=1,
62
  repetition_penalty=repetition_penalty,
63
  )
64
- t = Thread(target=model.generate, kwargs=generate_kwargs)
65
- t.start()
66
-
67
- outputs = []
68
- for text in streamer:
69
- outputs.append(text)
70
- yield "".join(outputs)
71
 
72
 
73
  demo = gr.ChatInterface(
@@ -76,7 +117,7 @@ demo = gr.ChatInterface(
76
  gr.Slider(
77
  label="Max new tokens",
78
  minimum=1,
79
- maximum=MAX_MAX_NEW_TOKENS,
80
  step=1,
81
  value=DEFAULT_MAX_NEW_TOKENS,
82
  ),
@@ -109,7 +150,6 @@ demo = gr.ChatInterface(
109
  value=1.2,
110
  ),
111
  ],
112
- stop_btn=None,
113
  examples=[
114
  ["Hello there! How are you doing?"],
115
  ["Can you explain briefly to me what is the Python programming language?"],
@@ -118,12 +158,10 @@ demo = gr.ChatInterface(
118
  ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
119
  ],
120
  cache_examples=False,
121
- type="messages",
122
  description=DESCRIPTION,
123
- css_paths="style.css",
124
  fill_height=True,
125
  )
126
 
127
 
128
  if __name__ == "__main__":
129
- demo.launch()
 
15
  For more details, please check [our post](https://huggingface.co/blog/llama32).
16
  """
17
 
18
+ MAX_NEW_TOKENS_LIMIT = 2048
19
  DEFAULT_MAX_NEW_TOKENS = 1024
20
+ MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "4096"))
21
 
22
+ MODEL_ID = "meta-llama/Llama-3.2-3B-Instruct"
23
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 
 
24
  model = AutoModelForCausalLM.from_pretrained(
25
+ MODEL_ID,
26
  device_map="auto",
27
  torch_dtype=torch.bfloat16,
28
  )
 
30
 
31
 
32
  @spaces.GPU(duration=90)
33
+ def _generate_on_gpu(
34
+ input_ids: torch.Tensor,
35
+ max_new_tokens: int,
36
+ temperature: float,
37
+ top_p: float,
38
+ top_k: int,
39
+ repetition_penalty: float,
40
+ ) -> Iterator[str]:
41
+ input_ids = input_ids.to(model.device)
42
+
43
+ streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
44
+ generate_kwargs = {
45
+ "input_ids": input_ids,
46
+ "streamer": streamer,
47
+ "max_new_tokens": max_new_tokens,
48
+ "do_sample": True,
49
+ "top_p": top_p,
50
+ "top_k": top_k,
51
+ "temperature": temperature,
52
+ "num_beams": 1,
53
+ "repetition_penalty": repetition_penalty,
54
+ }
55
+
56
+ exception_holder: list[Exception] = []
57
+
58
+ def _generate() -> None:
59
+ try:
60
+ model.generate(**generate_kwargs)
61
+ except Exception as e: # noqa: BLE001
62
+ exception_holder.append(e)
63
+
64
+ thread = Thread(target=_generate)
65
+ thread.start()
66
+
67
+ chunks: list[str] = []
68
+ for text in streamer:
69
+ chunks.append(text)
70
+ yield "".join(chunks)
71
+
72
+ thread.join()
73
+ if exception_holder:
74
+ msg = f"Generation failed: {exception_holder[0]}"
75
+ raise gr.Error(msg)
76
+
77
+
78
  def generate(
79
  message: str,
80
  chat_history: list[dict],
 
84
  top_k: int = 50,
85
  repetition_penalty: float = 1.2,
86
  ) -> Iterator[str]:
87
+ if not message or not message.strip():
88
+ raise gr.Error("Please enter a message.")
89
+
90
  conversation = [*chat_history, {"role": "user", "content": message}]
91
 
92
+ input_ids = tokenizer.apply_chat_template(
93
+ conversation, add_generation_prompt=True, return_tensors="pt", return_dict=True
94
+ ).input_ids
95
+ n_input_tokens = input_ids.shape[1]
96
+ if n_input_tokens > MAX_INPUT_TOKENS:
97
+ msg = f"Input too long ({n_input_tokens} tokens). Maximum is {MAX_INPUT_TOKENS} tokens."
98
+ raise gr.Error(msg)
99
 
100
+ max_new_tokens = min(max_new_tokens, MAX_INPUT_TOKENS - n_input_tokens)
101
+ if max_new_tokens <= 0:
102
+ raise gr.Error("Input uses the entire context window. No room to generate new tokens.")
103
+
104
+ yield from _generate_on_gpu(
105
+ input_ids=input_ids,
106
  max_new_tokens=max_new_tokens,
107
+ temperature=temperature,
108
  top_p=top_p,
109
  top_k=top_k,
 
 
110
  repetition_penalty=repetition_penalty,
111
  )
 
 
 
 
 
 
 
112
 
113
 
114
  demo = gr.ChatInterface(
 
117
  gr.Slider(
118
  label="Max new tokens",
119
  minimum=1,
120
+ maximum=MAX_NEW_TOKENS_LIMIT,
121
  step=1,
122
  value=DEFAULT_MAX_NEW_TOKENS,
123
  ),
 
150
  value=1.2,
151
  ),
152
  ],
 
153
  examples=[
154
  ["Hello there! How are you doing?"],
155
  ["Can you explain briefly to me what is the Python programming language?"],
 
158
  ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
159
  ],
160
  cache_examples=False,
 
161
  description=DESCRIPTION,
 
162
  fill_height=True,
163
  )
164
 
165
 
166
  if __name__ == "__main__":
167
+ demo.launch(css_paths="style.css")
pyproject.toml CHANGED
@@ -3,13 +3,12 @@ name = "llama-3-2-3b-instruct"
3
  version = "0.1.0"
4
  description = ""
5
  readme = "README.md"
6
- requires-python = ">=3.10"
7
  dependencies = [
8
  "accelerate>=1.9.0",
9
- "gradio>=5.39.0",
10
- "hf-transfer>=0.1.9",
11
  "spaces>=0.39.0",
12
- "torch==2.4.0",
13
  "transformers>=4.48.0",
14
  ]
15
 
@@ -35,7 +34,7 @@ ignore = [
35
  "EM101", # raw-string-in-exception
36
  "FBT001", # boolean-type-hint-positional-argument
37
  "FBT002", # boolean-default-value-positional-argument
38
- "PD901", # pandas-df-variable-name
39
  "PGH003", # blanket-type-ignore
40
  "PLR0913", # too-many-arguments
41
  "PLR0915", # too-many-statements
@@ -53,3 +52,12 @@ convention = "google"
53
 
54
  [tool.ruff.format]
55
  docstring-code-format = true
 
 
 
 
 
 
 
 
 
 
3
  version = "0.1.0"
4
  description = ""
5
  readme = "README.md"
6
+ requires-python = ">=3.12"
7
  dependencies = [
8
  "accelerate>=1.9.0",
9
+ "gradio>=6.8.0",
 
10
  "spaces>=0.39.0",
11
+ "torch==2.9.1",
12
  "transformers>=4.48.0",
13
  ]
14
 
 
34
  "EM101", # raw-string-in-exception
35
  "FBT001", # boolean-type-hint-positional-argument
36
  "FBT002", # boolean-default-value-positional-argument
37
+ "ISC001", # single-line-implicit-string-concatenation
38
  "PGH003", # blanket-type-ignore
39
  "PLR0913", # too-many-arguments
40
  "PLR0915", # too-many-statements
 
52
 
53
  [tool.ruff.format]
54
  docstring-code-format = true
55
+
56
+ [dependency-groups]
57
+ dev = [
58
+ "pre-commit>=4.5.1",
59
+ "ruff>=0.12.7",
60
+ ]
61
+ hf-spaces = [
62
+ "datasets",
63
+ ]
requirements.txt CHANGED
@@ -1,89 +1,116 @@
1
  # This file was autogenerated by uv via the following command:
2
- # uv pip compile pyproject.toml -o requirements.txt
3
- accelerate==1.9.0
4
- # via llama-3-2-3b-instruct (pyproject.toml)
5
- aiofiles==23.2.1
6
  # via gradio
 
 
 
 
 
 
 
 
 
 
7
  annotated-types==0.7.0
8
  # via pydantic
9
- anyio==4.8.0
10
  # via
11
  # gradio
12
  # httpx
13
  # starlette
14
- brotli==1.1.0
 
 
15
  # via gradio
16
- certifi==2024.12.14
 
 
17
  # via
18
  # httpcore
19
  # httpx
20
  # requests
21
- charset-normalizer==3.4.1
22
  # via requests
23
- click==8.1.8
24
  # via
25
  # typer
26
  # uvicorn
27
- exceptiongroup==1.2.2
28
- # via anyio
29
- fastapi==0.115.6
 
 
 
 
 
 
 
30
  # via gradio
31
- ffmpy==0.5.0
32
  # via gradio
33
- filelock==3.16.1
34
  # via
 
35
  # huggingface-hub
36
  # torch
37
- # transformers
38
- # triton
39
- fsspec==2024.12.0
40
  # via
 
 
 
 
 
41
  # gradio-client
42
  # huggingface-hub
43
  # torch
44
- gradio==5.39.0
45
  # via
46
- # llama-3-2-3b-instruct (pyproject.toml)
47
  # spaces
48
- gradio-client==1.11.0
49
  # via gradio
50
  groovy==0.1.2
51
  # via gradio
52
- h11==0.14.0
53
  # via
54
  # httpcore
55
  # uvicorn
56
- hf-transfer==0.1.9
57
- # via llama-3-2-3b-instruct (pyproject.toml)
58
- hf-xet==1.1.5
59
  # via huggingface-hub
60
- httpcore==1.0.7
61
  # via httpx
62
  httpx==0.28.1
63
  # via
 
64
  # gradio
65
  # gradio-client
 
66
  # safehttpx
67
  # spaces
68
- huggingface-hub==0.34.3
69
  # via
70
  # accelerate
 
71
  # gradio
72
  # gradio-client
73
  # tokenizers
74
  # transformers
75
- idna==3.10
76
  # via
77
  # anyio
78
  # httpx
79
  # requests
80
- jinja2==3.1.5
 
81
  # via
82
  # gradio
83
  # torch
84
- markdown-it-py==3.0.0
85
  # via rich
86
- markupsafe==2.1.5
87
  # via
88
  # gradio
89
  # jinja2
@@ -91,139 +118,166 @@ mdurl==0.1.2
91
  # via markdown-it-py
92
  mpmath==1.3.0
93
  # via sympy
94
- networkx==3.4.2
 
 
 
 
 
 
95
  # via torch
96
- numpy==2.2.1
97
  # via
98
  # accelerate
 
99
  # gradio
100
  # pandas
101
  # transformers
102
- nvidia-cublas-cu12==12.1.3.1
103
  # via
104
  # nvidia-cudnn-cu12
105
  # nvidia-cusolver-cu12
106
  # torch
107
- nvidia-cuda-cupti-cu12==12.1.105
 
 
108
  # via torch
109
- nvidia-cuda-nvrtc-cu12==12.1.105
110
  # via torch
111
- nvidia-cuda-runtime-cu12==12.1.105
112
  # via torch
113
- nvidia-cudnn-cu12==9.1.0.70
114
  # via torch
115
- nvidia-cufft-cu12==11.0.2.54
116
  # via torch
117
- nvidia-curand-cu12==10.3.2.106
118
  # via torch
119
- nvidia-cusolver-cu12==11.4.5.107
120
  # via torch
121
- nvidia-cusparse-cu12==12.1.0.106
122
  # via
123
  # nvidia-cusolver-cu12
124
  # torch
125
- nvidia-nccl-cu12==2.20.5
126
  # via torch
127
- nvidia-nvjitlink-cu12==12.6.85
 
 
128
  # via
 
129
  # nvidia-cusolver-cu12
130
  # nvidia-cusparse-cu12
131
- nvidia-nvtx-cu12==12.1.105
 
132
  # via torch
133
- orjson==3.10.14
 
 
134
  # via gradio
135
- packaging==24.2
136
  # via
137
  # accelerate
 
138
  # gradio
139
  # gradio-client
140
  # huggingface-hub
141
  # spaces
142
  # transformers
143
- pandas==2.2.3
144
- # via gradio
145
- pillow==11.1.0
 
 
146
  # via gradio
 
 
 
 
147
  psutil==5.9.8
148
  # via
149
  # accelerate
150
  # spaces
151
- pydantic==2.10.5
 
 
152
  # via
153
  # fastapi
154
  # gradio
155
  # spaces
156
- pydantic-core==2.27.2
157
  # via pydantic
158
  pydub==0.25.1
159
  # via gradio
160
- pygments==2.19.1
161
  # via rich
162
  python-dateutil==2.9.0.post0
163
  # via pandas
164
- python-multipart==0.0.20
165
  # via gradio
166
- pytz==2024.2
167
- # via pandas
168
- pyyaml==6.0.2
169
  # via
170
  # accelerate
 
171
  # gradio
172
  # huggingface-hub
173
  # transformers
174
- regex==2024.11.6
175
  # via transformers
176
- requests==2.32.3
177
  # via
178
- # huggingface-hub
179
  # spaces
180
- # transformers
181
- rich==13.9.4
182
  # via typer
183
- ruff==0.12.7
184
  # via gradio
185
- safehttpx==0.1.6
186
- # via gradio
187
- safetensors==0.5.2
188
  # via
189
  # accelerate
190
  # transformers
191
  semantic-version==2.10.0
192
  # via gradio
 
 
193
  shellingham==1.5.4
194
  # via typer
195
  six==1.17.0
196
  # via python-dateutil
197
- sniffio==1.3.1
198
- # via anyio
199
- spaces==0.39.0
200
- # via llama-3-2-3b-instruct (pyproject.toml)
201
- starlette==0.41.3
202
  # via
203
  # fastapi
204
  # gradio
205
- sympy==1.13.3
206
  # via torch
207
- tokenizers==0.21.0
208
  # via transformers
209
- tomlkit==0.13.2
210
  # via gradio
211
- torch==2.4.0
212
  # via
213
- # llama-3-2-3b-instruct (pyproject.toml)
214
  # accelerate
215
- tqdm==4.67.1
 
216
  # via
 
217
  # huggingface-hub
218
  # transformers
219
- transformers==4.48.0
220
- # via llama-3-2-3b-instruct (pyproject.toml)
221
- triton==3.0.0
222
  # via torch
223
- typer==0.15.1
224
- # via gradio
225
- typing-extensions==4.12.2
226
  # via
 
 
 
 
 
 
227
  # anyio
228
  # fastapi
229
  # gradio
@@ -231,16 +285,21 @@ typing-extensions==4.12.2
231
  # huggingface-hub
232
  # pydantic
233
  # pydantic-core
234
- # rich
235
  # spaces
 
236
  # torch
237
- # typer
238
- # uvicorn
239
- tzdata==2024.2
 
 
 
240
  # via pandas
241
- urllib3==2.3.0
242
  # via requests
243
- uvicorn==0.34.0
244
  # via gradio
245
- websockets==14.1
246
- # via gradio-client
 
 
 
1
  # This file was autogenerated by uv via the following command:
2
+ # uv export --no-hashes --no-dev --group hf-spaces --no-emit-package typer-slim -o requirements.txt
3
+ accelerate==1.12.0
4
+ # via llama-3-2-3b-instruct
5
+ aiofiles==24.1.0
6
  # via gradio
7
+ aiohappyeyeballs==2.6.1
8
+ # via aiohttp
9
+ aiohttp==3.13.3
10
+ # via fsspec
11
+ aiosignal==1.4.0
12
+ # via aiohttp
13
+ annotated-doc==0.0.4
14
+ # via
15
+ # fastapi
16
+ # typer
17
  annotated-types==0.7.0
18
  # via pydantic
19
+ anyio==4.12.1
20
  # via
21
  # gradio
22
  # httpx
23
  # starlette
24
+ attrs==25.4.0
25
+ # via aiohttp
26
+ audioop-lts==0.2.2 ; python_full_version >= '3.13'
27
  # via gradio
28
+ brotli==1.2.0
29
+ # via gradio
30
+ certifi==2026.2.25
31
  # via
32
  # httpcore
33
  # httpx
34
  # requests
35
+ charset-normalizer==3.4.4
36
  # via requests
37
+ click==8.3.1
38
  # via
39
  # typer
40
  # uvicorn
41
+ colorama==0.4.6 ; sys_platform == 'win32'
42
+ # via
43
+ # click
44
+ # tqdm
45
+ datasets==4.6.1
46
+ dill==0.4.0
47
+ # via
48
+ # datasets
49
+ # multiprocess
50
+ fastapi==0.135.1
51
  # via gradio
52
+ ffmpy==1.0.0
53
  # via gradio
54
+ filelock==3.25.0
55
  # via
56
+ # datasets
57
  # huggingface-hub
58
  # torch
59
+ frozenlist==1.8.0
 
 
60
  # via
61
+ # aiohttp
62
+ # aiosignal
63
+ fsspec==2026.2.0
64
+ # via
65
+ # datasets
66
  # gradio-client
67
  # huggingface-hub
68
  # torch
69
+ gradio==6.8.0
70
  # via
71
+ # llama-3-2-3b-instruct
72
  # spaces
73
+ gradio-client==2.2.0
74
  # via gradio
75
  groovy==0.1.2
76
  # via gradio
77
+ h11==0.16.0
78
  # via
79
  # httpcore
80
  # uvicorn
81
+ hf-xet==1.3.2 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
 
 
82
  # via huggingface-hub
83
+ httpcore==1.0.9
84
  # via httpx
85
  httpx==0.28.1
86
  # via
87
+ # datasets
88
  # gradio
89
  # gradio-client
90
+ # huggingface-hub
91
  # safehttpx
92
  # spaces
93
+ huggingface-hub==1.5.0
94
  # via
95
  # accelerate
96
+ # datasets
97
  # gradio
98
  # gradio-client
99
  # tokenizers
100
  # transformers
101
+ idna==3.11
102
  # via
103
  # anyio
104
  # httpx
105
  # requests
106
+ # yarl
107
+ jinja2==3.1.6
108
  # via
109
  # gradio
110
  # torch
111
+ markdown-it-py==4.0.0
112
  # via rich
113
+ markupsafe==3.0.3
114
  # via
115
  # gradio
116
  # jinja2
 
118
  # via markdown-it-py
119
  mpmath==1.3.0
120
  # via sympy
121
+ multidict==6.7.1
122
+ # via
123
+ # aiohttp
124
+ # yarl
125
+ multiprocess==0.70.18
126
+ # via datasets
127
+ networkx==3.6.1
128
  # via torch
129
+ numpy==2.4.2
130
  # via
131
  # accelerate
132
+ # datasets
133
  # gradio
134
  # pandas
135
  # transformers
136
+ nvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
137
  # via
138
  # nvidia-cudnn-cu12
139
  # nvidia-cusolver-cu12
140
  # torch
141
+ nvidia-cuda-cupti-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
142
+ # via torch
143
+ nvidia-cuda-nvrtc-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
144
  # via torch
145
+ nvidia-cuda-runtime-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
146
  # via torch
147
+ nvidia-cudnn-cu12==9.10.2.21 ; platform_machine == 'x86_64' and sys_platform == 'linux'
148
  # via torch
149
+ nvidia-cufft-cu12==11.3.3.83 ; platform_machine == 'x86_64' and sys_platform == 'linux'
150
  # via torch
151
+ nvidia-cufile-cu12==1.13.1.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
152
  # via torch
153
+ nvidia-curand-cu12==10.3.9.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
154
  # via torch
155
+ nvidia-cusolver-cu12==11.7.3.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
156
  # via torch
157
+ nvidia-cusparse-cu12==12.5.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
158
  # via
159
  # nvidia-cusolver-cu12
160
  # torch
161
+ nvidia-cusparselt-cu12==0.7.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
162
  # via torch
163
+ nvidia-nccl-cu12==2.27.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'
164
+ # via torch
165
+ nvidia-nvjitlink-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
166
  # via
167
+ # nvidia-cufft-cu12
168
  # nvidia-cusolver-cu12
169
  # nvidia-cusparse-cu12
170
+ # torch
171
+ nvidia-nvshmem-cu12==3.3.20 ; platform_machine == 'x86_64' and sys_platform == 'linux'
172
  # via torch
173
+ nvidia-nvtx-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
174
+ # via torch
175
+ orjson==3.11.7
176
  # via gradio
177
+ packaging==26.0
178
  # via
179
  # accelerate
180
+ # datasets
181
  # gradio
182
  # gradio-client
183
  # huggingface-hub
184
  # spaces
185
  # transformers
186
+ pandas==3.0.1
187
+ # via
188
+ # datasets
189
+ # gradio
190
+ pillow==12.1.1
191
  # via gradio
192
+ propcache==0.4.1
193
+ # via
194
+ # aiohttp
195
+ # yarl
196
  psutil==5.9.8
197
  # via
198
  # accelerate
199
  # spaces
200
+ pyarrow==23.0.1
201
+ # via datasets
202
+ pydantic==2.12.5
203
  # via
204
  # fastapi
205
  # gradio
206
  # spaces
207
+ pydantic-core==2.41.5
208
  # via pydantic
209
  pydub==0.25.1
210
  # via gradio
211
+ pygments==2.19.2
212
  # via rich
213
  python-dateutil==2.9.0.post0
214
  # via pandas
215
+ python-multipart==0.0.22
216
  # via gradio
217
+ pytz==2025.2
218
+ # via gradio
219
+ pyyaml==6.0.3
220
  # via
221
  # accelerate
222
+ # datasets
223
  # gradio
224
  # huggingface-hub
225
  # transformers
226
+ regex==2026.2.28
227
  # via transformers
228
+ requests==2.32.5
229
  # via
230
+ # datasets
231
  # spaces
232
+ rich==14.3.3
 
233
  # via typer
234
+ safehttpx==0.1.7
235
  # via gradio
236
+ safetensors==0.7.0
 
 
237
  # via
238
  # accelerate
239
  # transformers
240
  semantic-version==2.10.0
241
  # via gradio
242
+ setuptools==82.0.0
243
+ # via torch
244
  shellingham==1.5.4
245
  # via typer
246
  six==1.17.0
247
  # via python-dateutil
248
+ spaces==0.47.0
249
+ # via llama-3-2-3b-instruct
250
+ starlette==0.52.1
 
 
251
  # via
252
  # fastapi
253
  # gradio
254
+ sympy==1.14.0
255
  # via torch
256
+ tokenizers==0.22.2
257
  # via transformers
258
+ tomlkit==0.13.3
259
  # via gradio
260
+ torch==2.9.1
261
  # via
 
262
  # accelerate
263
+ # llama-3-2-3b-instruct
264
+ tqdm==4.67.3
265
  # via
266
+ # datasets
267
  # huggingface-hub
268
  # transformers
269
+ transformers==5.2.0
270
+ # via llama-3-2-3b-instruct
271
+ triton==3.5.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
272
  # via torch
273
+ typer==0.24.1
 
 
274
  # via
275
+ # gradio
276
+ # huggingface-hub
277
+ # typer-slim
278
+ typing-extensions==4.15.0
279
+ # via
280
+ # aiosignal
281
  # anyio
282
  # fastapi
283
  # gradio
 
285
  # huggingface-hub
286
  # pydantic
287
  # pydantic-core
 
288
  # spaces
289
+ # starlette
290
  # torch
291
+ # typing-inspection
292
+ typing-inspection==0.4.2
293
+ # via
294
+ # fastapi
295
+ # pydantic
296
+ tzdata==2025.3 ; sys_platform == 'emscripten' or sys_platform == 'win32'
297
  # via pandas
298
+ urllib3==2.6.3
299
  # via requests
300
+ uvicorn==0.41.0
301
  # via gradio
302
+ xxhash==3.6.0
303
+ # via datasets
304
+ yarl==1.23.0
305
+ # via aiohttp
uv.lock CHANGED
The diff for this file is too large to render. See raw diff