GRM2-Chat

Running on Zero

App Files Files Community

hysts HF Staff commited on Mar 3

Commit

b7a23a9

1 Parent(s): 2c78411

Update

Browse files

Files changed (7) hide show

.pre-commit-config.yaml +4 -4
.python-version +1 -1
README.md +2 -1
app.py +69 -31
pyproject.toml +13 -5
requirements.txt +149 -90
uv.lock +0 -0

.pre-commit-config.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v5.0.0
     hooks:
       - id: check-executables-have-shebangs
       - id: check-json
@@ -14,13 +14,13 @@ repos:
       - id: requirements-txt-fixer
       - id: trailing-whitespace
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.8.6
     hooks:
-      - id: ruff
         args: ["--fix"]
       - id: ruff-format
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.14.1
     hooks:
       - id: mypy
         args: ["--ignore-missing-imports"]

 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v6.0.0
     hooks:
       - id: check-executables-have-shebangs
       - id: check-json
       - id: requirements-txt-fixer
       - id: trailing-whitespace
   - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.15.4
     hooks:
+      - id: ruff-check
         args: ["--fix"]
       - id: ruff-format
   - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.19.1
     hooks:
       - id: mypy
         args: ["--ignore-missing-imports"]

.python-version CHANGED Viewed

	@@ -1 +1 @@
1	- 3.10


1	+ 3.12

README.md CHANGED Viewed

@@ -4,7 +4,8 @@ emoji: 😻
 colorFrom: indigo
 colorTo: pink
 sdk: gradio
-sdk_version: 5.39.0
 app_file: app.py
 pinned: false
 short_description: Chatbot

 colorFrom: indigo
 colorTo: pink
 sdk: gradio
+sdk_version: 6.8.0
+python_version: "3.12.12"
 app_file: app.py
 pinned: false
 short_description: Chatbot

app.py CHANGED Viewed

@@ -15,16 +15,14 @@ This is a demo of [`meta-llama/Llama-3.2-3B-Instruct`](https://huggingface.co/me
 For more details, please check [our post](https://huggingface.co/blog/llama32).
 """
-MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
-MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
-device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-model_id = "meta-llama/Llama-3.2-3B-Instruct"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
-    model_id,
     device_map="auto",
     torch_dtype=torch.bfloat16,
 )
@@ -32,6 +30,51 @@ model.eval()
 @spaces.GPU(duration=90)
 def generate(
     message: str,
     chat_history: list[dict],
@@ -41,33 +84,31 @@ def generate(
     top_k: int = 50,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
     conversation = [*chat_history, {"role": "user", "content": message}]
-    input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
-    if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
-        input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
-        gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
-    input_ids = input_ids.to(model.device)
-    streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
-    generate_kwargs = dict(
-        {"input_ids": input_ids},
-        streamer=streamer,
         max_new_tokens=max_new_tokens,
-        do_sample=True,
         top_p=top_p,
         top_k=top_k,
-        temperature=temperature,
-        num_beams=1,
         repetition_penalty=repetition_penalty,
     )
-    t = Thread(target=model.generate, kwargs=generate_kwargs)
-    t.start()
-    outputs = []
-    for text in streamer:
-        outputs.append(text)
-        yield "".join(outputs)
 demo = gr.ChatInterface(
@@ -76,7 +117,7 @@ demo = gr.ChatInterface(
         gr.Slider(
             label="Max new tokens",
             minimum=1,
-            maximum=MAX_MAX_NEW_TOKENS,
             step=1,
             value=DEFAULT_MAX_NEW_TOKENS,
         ),
@@ -109,7 +150,6 @@ demo = gr.ChatInterface(
             value=1.2,
         ),
     ],
-    stop_btn=None,
     examples=[
         ["Hello there! How are you doing?"],
         ["Can you explain briefly to me what is the Python programming language?"],
@@ -118,12 +158,10 @@ demo = gr.ChatInterface(
         ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
     ],
     cache_examples=False,
-    type="messages",
     description=DESCRIPTION,
-    css_paths="style.css",
     fill_height=True,
 )
 if __name__ == "__main__":
-    demo.launch()

 For more details, please check [our post](https://huggingface.co/blog/llama32).
 """
+MAX_NEW_TOKENS_LIMIT = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
+MAX_INPUT_TOKENS = int(os.getenv("MAX_INPUT_TOKENS", "4096"))
+MODEL_ID = "meta-llama/Llama-3.2-3B-Instruct"
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 model = AutoModelForCausalLM.from_pretrained(
+    MODEL_ID,
     device_map="auto",
     torch_dtype=torch.bfloat16,
 )
 @spaces.GPU(duration=90)
+def _generate_on_gpu(
+    input_ids: torch.Tensor,
+    max_new_tokens: int,
+    temperature: float,
+    top_p: float,
+    top_k: int,
+    repetition_penalty: float,
+) -> Iterator[str]:
+    input_ids = input_ids.to(model.device)
+    streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
+    generate_kwargs = {
+        "input_ids": input_ids,
+        "streamer": streamer,
+        "max_new_tokens": max_new_tokens,
+        "do_sample": True,
+        "top_p": top_p,
+        "top_k": top_k,
+        "temperature": temperature,
+        "num_beams": 1,
+        "repetition_penalty": repetition_penalty,
+    }
+    exception_holder: list[Exception] = []
+    def _generate() -> None:
+        try:
+            model.generate(**generate_kwargs)
+        except Exception as e:  # noqa: BLE001
+            exception_holder.append(e)
+    thread = Thread(target=_generate)
+    thread.start()
+    chunks: list[str] = []
+    for text in streamer:
+        chunks.append(text)
+        yield "".join(chunks)
+    thread.join()
+    if exception_holder:
+        msg = f"Generation failed: {exception_holder[0]}"
+        raise gr.Error(msg)
 def generate(
     message: str,
     chat_history: list[dict],
     top_k: int = 50,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
+    if not message or not message.strip():
+        raise gr.Error("Please enter a message.")
     conversation = [*chat_history, {"role": "user", "content": message}]
+    input_ids = tokenizer.apply_chat_template(
+        conversation, add_generation_prompt=True, return_tensors="pt", return_dict=True
+    ).input_ids
+    n_input_tokens = input_ids.shape[1]
+    if n_input_tokens > MAX_INPUT_TOKENS:
+        msg = f"Input too long ({n_input_tokens} tokens). Maximum is {MAX_INPUT_TOKENS} tokens."
+        raise gr.Error(msg)
+    max_new_tokens = min(max_new_tokens, MAX_INPUT_TOKENS - n_input_tokens)
+    if max_new_tokens <= 0:
+        raise gr.Error("Input uses the entire context window. No room to generate new tokens.")
+    yield from _generate_on_gpu(
+        input_ids=input_ids,
         max_new_tokens=max_new_tokens,
+        temperature=temperature,
         top_p=top_p,
         top_k=top_k,
         repetition_penalty=repetition_penalty,
     )
 demo = gr.ChatInterface(
         gr.Slider(
             label="Max new tokens",
             minimum=1,
+            maximum=MAX_NEW_TOKENS_LIMIT,
             step=1,
             value=DEFAULT_MAX_NEW_TOKENS,
         ),
             value=1.2,
         ),
     ],
     examples=[
         ["Hello there! How are you doing?"],
         ["Can you explain briefly to me what is the Python programming language?"],
         ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
     ],
     cache_examples=False,
     description=DESCRIPTION,
     fill_height=True,
 )
 if __name__ == "__main__":
+    demo.launch(css_paths="style.css")

pyproject.toml CHANGED Viewed

@@ -3,13 +3,12 @@ name = "llama-3-2-3b-instruct"
 version = "0.1.0"
 description = ""
 readme = "README.md"
-requires-python = ">=3.10"
 dependencies = [
     "accelerate>=1.9.0",
-    "gradio>=5.39.0",
-    "hf-transfer>=0.1.9",
     "spaces>=0.39.0",
-    "torch==2.4.0",
     "transformers>=4.48.0",
 ]
@@ -35,7 +34,7 @@ ignore = [
     "EM101",   # raw-string-in-exception
     "FBT001",  # boolean-type-hint-positional-argument
     "FBT002",  # boolean-default-value-positional-argument
-    "PD901",   # pandas-df-variable-name
     "PGH003",  # blanket-type-ignore
     "PLR0913", # too-many-arguments
     "PLR0915", # too-many-statements
@@ -53,3 +52,12 @@ convention = "google"
 [tool.ruff.format]
 docstring-code-format = true

 version = "0.1.0"
 description = ""
 readme = "README.md"
+requires-python = ">=3.12"
 dependencies = [
     "accelerate>=1.9.0",
+    "gradio>=6.8.0",
     "spaces>=0.39.0",
+    "torch==2.9.1",
     "transformers>=4.48.0",
 ]
     "EM101",   # raw-string-in-exception
     "FBT001",  # boolean-type-hint-positional-argument
     "FBT002",  # boolean-default-value-positional-argument
+    "ISC001",  # single-line-implicit-string-concatenation
     "PGH003",  # blanket-type-ignore
     "PLR0913", # too-many-arguments
     "PLR0915", # too-many-statements
 [tool.ruff.format]
 docstring-code-format = true
+[dependency-groups]
+dev = [
+    "pre-commit>=4.5.1",
+    "ruff>=0.12.7",
+]
+hf-spaces = [
+    "datasets",
+]

requirements.txt CHANGED Viewed

@@ -1,89 +1,116 @@
 # This file was autogenerated by uv via the following command:
-#    uv pip compile pyproject.toml -o requirements.txt
-accelerate==1.9.0
-    # via llama-3-2-3b-instruct (pyproject.toml)
-aiofiles==23.2.1
     # via gradio
 annotated-types==0.7.0
     # via pydantic
-anyio==4.8.0
     # via
     #   gradio
     #   httpx
     #   starlette
-brotli==1.1.0
     # via gradio
-certifi==2024.12.14
     # via
     #   httpcore
     #   httpx
     #   requests
-charset-normalizer==3.4.1
     # via requests
-click==8.1.8
     # via
     #   typer
     #   uvicorn
-exceptiongroup==1.2.2
-    # via anyio
-fastapi==0.115.6
     # via gradio
-ffmpy==0.5.0
     # via gradio
-filelock==3.16.1
     # via
     #   huggingface-hub
     #   torch
-    #   transformers
-    #   triton
-fsspec==2024.12.0
     # via
     #   gradio-client
     #   huggingface-hub
     #   torch
-gradio==5.39.0
     # via
-    #   llama-3-2-3b-instruct (pyproject.toml)
     #   spaces
-gradio-client==1.11.0
     # via gradio
 groovy==0.1.2
     # via gradio
-h11==0.14.0
     # via
     #   httpcore
     #   uvicorn
-hf-transfer==0.1.9
-    # via llama-3-2-3b-instruct (pyproject.toml)
-hf-xet==1.1.5
     # via huggingface-hub
-httpcore==1.0.7
     # via httpx
 httpx==0.28.1
     # via
     #   gradio
     #   gradio-client
     #   safehttpx
     #   spaces
-huggingface-hub==0.34.3
     # via
     #   accelerate
     #   gradio
     #   gradio-client
     #   tokenizers
     #   transformers
-idna==3.10
     # via
     #   anyio
     #   httpx
     #   requests
-jinja2==3.1.5
     # via
     #   gradio
     #   torch
-markdown-it-py==3.0.0
     # via rich
-markupsafe==2.1.5
     # via
     #   gradio
     #   jinja2
@@ -91,139 +118,166 @@ mdurl==0.1.2
     # via markdown-it-py
 mpmath==1.3.0
     # via sympy
-networkx==3.4.2
     # via torch
-numpy==2.2.1
     # via
     #   accelerate
     #   gradio
     #   pandas
     #   transformers
-nvidia-cublas-cu12==12.1.3.1
     # via
     #   nvidia-cudnn-cu12
     #   nvidia-cusolver-cu12
     #   torch
-nvidia-cuda-cupti-cu12==12.1.105
     # via torch
-nvidia-cuda-nvrtc-cu12==12.1.105
     # via torch
-nvidia-cuda-runtime-cu12==12.1.105
     # via torch
-nvidia-cudnn-cu12==9.1.0.70
     # via torch
-nvidia-cufft-cu12==11.0.2.54
     # via torch
-nvidia-curand-cu12==10.3.2.106
     # via torch
-nvidia-cusolver-cu12==11.4.5.107
     # via torch
-nvidia-cusparse-cu12==12.1.0.106
     # via
     #   nvidia-cusolver-cu12
     #   torch
-nvidia-nccl-cu12==2.20.5
     # via torch
-nvidia-nvjitlink-cu12==12.6.85
     # via
     #   nvidia-cusolver-cu12
     #   nvidia-cusparse-cu12
-nvidia-nvtx-cu12==12.1.105
     # via torch
-orjson==3.10.14
     # via gradio
-packaging==24.2
     # via
     #   accelerate
     #   gradio
     #   gradio-client
     #   huggingface-hub
     #   spaces
     #   transformers
-pandas==2.2.3
-    # via gradio
-pillow==11.1.0
     # via gradio
 psutil==5.9.8
     # via
     #   accelerate
     #   spaces
-pydantic==2.10.5
     # via
     #   fastapi
     #   gradio
     #   spaces
-pydantic-core==2.27.2
     # via pydantic
 pydub==0.25.1
     # via gradio
-pygments==2.19.1
     # via rich
 python-dateutil==2.9.0.post0
     # via pandas
-python-multipart==0.0.20
     # via gradio
-pytz==2024.2
-    # via pandas
-pyyaml==6.0.2
     # via
     #   accelerate
     #   gradio
     #   huggingface-hub
     #   transformers
-regex==2024.11.6
     # via transformers
-requests==2.32.3
     # via
-    #   huggingface-hub
     #   spaces
-    #   transformers
-rich==13.9.4
     # via typer
-ruff==0.12.7
     # via gradio
-safehttpx==0.1.6
-    # via gradio
-safetensors==0.5.2
     # via
     #   accelerate
     #   transformers
 semantic-version==2.10.0
     # via gradio
 shellingham==1.5.4
     # via typer
 six==1.17.0
     # via python-dateutil
-sniffio==1.3.1
-    # via anyio
-spaces==0.39.0
-    # via llama-3-2-3b-instruct (pyproject.toml)
-starlette==0.41.3
     # via
     #   fastapi
     #   gradio
-sympy==1.13.3
     # via torch
-tokenizers==0.21.0
     # via transformers
-tomlkit==0.13.2
     # via gradio
-torch==2.4.0
     # via
-    #   llama-3-2-3b-instruct (pyproject.toml)
     #   accelerate
-tqdm==4.67.1
     # via
     #   huggingface-hub
     #   transformers
-transformers==4.48.0
-    # via llama-3-2-3b-instruct (pyproject.toml)
-triton==3.0.0
     # via torch
-typer==0.15.1
-    # via gradio
-typing-extensions==4.12.2
     # via
     #   anyio
     #   fastapi
     #   gradio
@@ -231,16 +285,21 @@ typing-extensions==4.12.2
     #   huggingface-hub
     #   pydantic
     #   pydantic-core
-    #   rich
     #   spaces
     #   torch
-    #   typer
-    #   uvicorn
-tzdata==2024.2
     # via pandas
-urllib3==2.3.0
     # via requests
-uvicorn==0.34.0
     # via gradio
-websockets==14.1
-    # via gradio-client

 # This file was autogenerated by uv via the following command:
+#    uv export --no-hashes --no-dev --group hf-spaces --no-emit-package typer-slim -o requirements.txt
+accelerate==1.12.0
+    # via llama-3-2-3b-instruct
+aiofiles==24.1.0
     # via gradio
+aiohappyeyeballs==2.6.1
+    # via aiohttp
+aiohttp==3.13.3
+    # via fsspec
+aiosignal==1.4.0
+    # via aiohttp
+annotated-doc==0.0.4
+    # via
+    #   fastapi
+    #   typer
 annotated-types==0.7.0
     # via pydantic
+anyio==4.12.1
     # via
     #   gradio
     #   httpx
     #   starlette
+attrs==25.4.0
+    # via aiohttp
+audioop-lts==0.2.2 ; python_full_version >= '3.13'
     # via gradio
+brotli==1.2.0
+    # via gradio
+certifi==2026.2.25
     # via
     #   httpcore
     #   httpx
     #   requests
+charset-normalizer==3.4.4
     # via requests
+click==8.3.1
     # via
     #   typer
     #   uvicorn
+colorama==0.4.6 ; sys_platform == 'win32'
+    # via
+    #   click
+    #   tqdm
+datasets==4.6.1
+dill==0.4.0
+    # via
+    #   datasets
+    #   multiprocess
+fastapi==0.135.1
     # via gradio
+ffmpy==1.0.0
     # via gradio
+filelock==3.25.0
     # via
+    #   datasets
     #   huggingface-hub
     #   torch
+frozenlist==1.8.0
     # via
+    #   aiohttp
+    #   aiosignal
+fsspec==2026.2.0
+    # via
+    #   datasets
     #   gradio-client
     #   huggingface-hub
     #   torch
+gradio==6.8.0
     # via
+    #   llama-3-2-3b-instruct
     #   spaces
+gradio-client==2.2.0
     # via gradio
 groovy==0.1.2
     # via gradio
+h11==0.16.0
     # via
     #   httpcore
     #   uvicorn
+hf-xet==1.3.2 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
     # via huggingface-hub
+httpcore==1.0.9
     # via httpx
 httpx==0.28.1
     # via
+    #   datasets
     #   gradio
     #   gradio-client
+    #   huggingface-hub
     #   safehttpx
     #   spaces
+huggingface-hub==1.5.0
     # via
     #   accelerate
+    #   datasets
     #   gradio
     #   gradio-client
     #   tokenizers
     #   transformers
+idna==3.11
     # via
     #   anyio
     #   httpx
     #   requests
+    #   yarl
+jinja2==3.1.6
     # via
     #   gradio
     #   torch
+markdown-it-py==4.0.0
     # via rich
+markupsafe==3.0.3
     # via
     #   gradio
     #   jinja2
     # via markdown-it-py
 mpmath==1.3.0
     # via sympy
+multidict==6.7.1
+    # via
+    #   aiohttp
+    #   yarl
+multiprocess==0.70.18
+    # via datasets
+networkx==3.6.1
     # via torch
+numpy==2.4.2
     # via
     #   accelerate
+    #   datasets
     #   gradio
     #   pandas
     #   transformers
+nvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via
     #   nvidia-cudnn-cu12
     #   nvidia-cusolver-cu12
     #   torch
+nvidia-cuda-cupti-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cuda-nvrtc-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
+nvidia-cuda-runtime-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
+nvidia-cudnn-cu12==9.10.2.21 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
+nvidia-cufft-cu12==11.3.3.83 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
+nvidia-cufile-cu12==1.13.1.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
+nvidia-curand-cu12==10.3.9.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
+nvidia-cusolver-cu12==11.7.3.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
+nvidia-cusparse-cu12==12.5.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via
     #   nvidia-cusolver-cu12
     #   torch
+nvidia-cusparselt-cu12==0.7.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
+nvidia-nccl-cu12==2.27.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-nvjitlink-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via
+    #   nvidia-cufft-cu12
     #   nvidia-cusolver-cu12
     #   nvidia-cusparse-cu12
+    #   torch
+nvidia-nvshmem-cu12==3.3.20 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
+nvidia-nvtx-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+orjson==3.11.7
     # via gradio
+packaging==26.0
     # via
     #   accelerate
+    #   datasets
     #   gradio
     #   gradio-client
     #   huggingface-hub
     #   spaces
     #   transformers
+pandas==3.0.1
+    # via
+    #   datasets
+    #   gradio
+pillow==12.1.1
     # via gradio
+propcache==0.4.1
+    # via
+    #   aiohttp
+    #   yarl
 psutil==5.9.8
     # via
     #   accelerate
     #   spaces
+pyarrow==23.0.1
+    # via datasets
+pydantic==2.12.5
     # via
     #   fastapi
     #   gradio
     #   spaces
+pydantic-core==2.41.5
     # via pydantic
 pydub==0.25.1
     # via gradio
+pygments==2.19.2
     # via rich
 python-dateutil==2.9.0.post0
     # via pandas
+python-multipart==0.0.22
     # via gradio
+pytz==2025.2
+    # via gradio
+pyyaml==6.0.3
     # via
     #   accelerate
+    #   datasets
     #   gradio
     #   huggingface-hub
     #   transformers
+regex==2026.2.28
     # via transformers
+requests==2.32.5
     # via
+    #   datasets
     #   spaces
+rich==14.3.3
     # via typer
+safehttpx==0.1.7
     # via gradio
+safetensors==0.7.0
     # via
     #   accelerate
     #   transformers
 semantic-version==2.10.0
     # via gradio
+setuptools==82.0.0
+    # via torch
 shellingham==1.5.4
     # via typer
 six==1.17.0
     # via python-dateutil
+spaces==0.47.0
+    # via llama-3-2-3b-instruct
+starlette==0.52.1
     # via
     #   fastapi
     #   gradio
+sympy==1.14.0
     # via torch
+tokenizers==0.22.2
     # via transformers
+tomlkit==0.13.3
     # via gradio
+torch==2.9.1
     # via
     #   accelerate
+    #   llama-3-2-3b-instruct
+tqdm==4.67.3
     # via
+    #   datasets
     #   huggingface-hub
     #   transformers
+transformers==5.2.0
+    # via llama-3-2-3b-instruct
+triton==3.5.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
+typer==0.24.1
     # via
+    #   gradio
+    #   huggingface-hub
+    #   typer-slim
+typing-extensions==4.15.0
+    # via
+    #   aiosignal
     #   anyio
     #   fastapi
     #   gradio
     #   huggingface-hub
     #   pydantic
     #   pydantic-core
     #   spaces
+    #   starlette
     #   torch
+    #   typing-inspection
+typing-inspection==0.4.2
+    # via
+    #   fastapi
+    #   pydantic
+tzdata==2025.3 ; sys_platform == 'emscripten' or sys_platform == 'win32'
     # via pandas
+urllib3==2.6.3
     # via requests
+uvicorn==0.41.0
     # via gradio
+xxhash==3.6.0
+    # via datasets
+yarl==1.23.0
+    # via aiohttp

uv.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff