Spaces:

Borzyszkowski
/

AlpineLLM-App

Sleeping

App Files Files Community

Borzyszkowski commited on Oct 1, 2025

Commit

bb2fa48

1 Parent(s): 99a9f7b

ALP-1: web app for Alpine LLM'

Browse files

Files changed (8) hide show

.gitignore +211 -0
README.md +1 -1
app.py +84 -69
config_util.py +34 -0
demo_inference.py +55 -0
model/transformer_decoder.py +166 -0
requirements.txt +4 -0
tokenizer.py +21 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,211 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+# Other custom ignores
+best_model
+model-cache

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: AlpineLLM
 emoji: 💬
 colorFrom: yellow
 colorTo: purple

 ---
+title: AlpineLLM-App
 emoji: 💬
 colorFrom: yellow
 colorTo: purple

app.py CHANGED Viewed

@@ -1,70 +1,85 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    hf_token: gr.OAuthToken,
-):
-    """
-    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-    """
-    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
-    messages = [{"role": "system", "content": system_message}]
-    messages.extend(history)
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        choices = message.choices
-        token = ""
-        if len(choices) and choices[0].delta.content:
-            token = choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-chatbot = gr.ChatInterface(
-    respond,
-    type="messages",
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
-with gr.Blocks() as demo:
-    with gr.Sidebar():
-        gr.LoginButton()
-    chatbot.render()
-if __name__ == "__main__":
-    demo.launch()

+""" A simple Gradio web app to interact with the AlpineLLM model """
 import gradio as gr
+import os
+import torch
+from huggingface_hub import hf_hub_download
+from demo_inference import AlpineLLMInference
+from config_util import Config
+def download_model(cfg):
+    """ Download the model weights from Hugging Face Hub """
+    model_path = hf_hub_download(
+        repo_id=cfg.repo_id,
+        filename=cfg.model_name,
+        cache_dir="./model-cache"
+    )
+    return model_path
+def start_app():
+    """ Start the web app via Gradio with custom layout """
+    with gr.Blocks(css="""#builtwithgradio, .footer, .svelte-1ipelgc {display: none !important;}""") as app:
+        gr.Markdown("<h1 style='text-align: center;'> AlpineLLM App</h1>")
+        gr.Markdown(
+            "<p style='text-align: center;'>"
+            "A domain-specific language model for alpine storytelling. <br>"
+            "Generate climbing stories, mountain impressions, and expedition-style text."
+            "</p>"
+        )
+        with gr.Row():
+            with gr.Column(scale=1):
+                prompt = gr.Textbox(
+                    lines=8,
+                    label="Your alpine prompt...",
+                    placeholder="A dawn climb on the Matterhorn..."
+                )
+                max_tokens = gr.Slider(50, 1000, value=300, step=10, label="Max output tokens")
+                generate_btn = gr.Button("🚀 Generate")
+            with gr.Column(scale=2):
+                output = gr.Textbox(lines=20, label="Generated Alpine Story", interactive=False)
+        # Bind button click to inference
+        generate_btn.click(
+            fn=inference.generate_text,
+            inputs=[prompt, max_tokens],
+            outputs=output
+        )
+    app.launch(server_name="0.0.0.0", server_port=7860)
+if __name__ == '__main__':
+    os.chdir(os.path.dirname(os.path.abspath(__file__)))
+    # Define the configuration
+    cfg = {
+        'cuda_id': 0,
+        'model_type': 'transformer',
+        'repo_id': "Borzyszkowski/AlpineLLM-model",
+        'model_name': "best_model",
+    }
+    cfg = Config(cfg)
+    # Define the hyperparameters
+    hyperparam_cfg={
+        "embedding_dim": 384,
+        "num_heads": 6,
+        "num_layers": 6,
+        "dropout": 0.2,
+        "context_len": 256,
+        "lr": 3e-4,
+    }
+    hyperparam_cfg = Config(hyperparam_cfg)
+    # Ensure model weights are available
+    cfg.load_weights_path = download_model(cfg)
+    # Start the application
+    inference = AlpineLLMInference(cfg, hyperparam_cfg)
+    start_app()

config_util.py ADDED Viewed

	@@ -0,0 +1,34 @@

+""" Config utility """
+import os
+import yaml
+class Config(dict):
+    """ Parser for the .yaml configuration files"""
+    def __init__(self, config, user_cfg_path=None):
+        user_config = self.load_cfg(user_cfg_path) if user_cfg_path else {}
+        # Update default_cfg with user_config (overwriting defaults if needed)
+        config.update(user_config)
+        super().__init__(config)
+    def load_cfg(self, load_path):
+        with open(load_path, "r") as infile:
+            cfg = yaml.safe_load(infile)
+        return cfg if cfg is not None else {}
+    def write_cfg(self, write_path):
+        os.makedirs(os.path.dirname(write_path), exist_ok=True)
+        dump_dict = {k: v for k, v in self.items() if k != "default_cfg"}
+        with open(write_path, "w") as outfile:
+            yaml.safe_dump(dump_dict, outfile, default_flow_style=False)
+    def __getattr__(self, key):
+        try:
+            return self[key]
+        except KeyError:
+            raise AttributeError(key)
+    __setattr__ = dict.__setitem__
+    __delattr__ = dict.__delitem__

demo_inference.py ADDED Viewed

	@@ -0,0 +1,55 @@

+""" Lightweight inference wrapper for the demo application """
+import logging
+import torch
+from tokenizer import CharacterLevelTokenizer
+from model.transformer_decoder import TransformerDecoder
+class AlpineLLMInference:
+    def __init__(self, cfg, hyperparam_cfg):
+        self.cfg = cfg
+        self.hyperparam_cfg = hyperparam_cfg
+        self.device = torch.device(f"cuda:{self.cfg.cuda_id}" if torch.cuda.is_available() else "cpu")
+        self.tokenizer = CharacterLevelTokenizer()
+        self.model = self.select_model()
+        self.get_model(cfg.load_weights_path)
+    def run_demo(self):
+        """ Run a simple demo loop to generate text based on user input """
+        while True:
+            prompt = input("Enter a prompt (or 'exit' to quit): ")
+            if prompt.lower() == 'exit':
+                logging.info("Exiting the demo.")
+                break
+            generated_text = self.generate_text(prompt)
+            logging.info(f"Generated Text:\n{generated_text}\n")
+    @torch.no_grad()
+    def generate_text(self, prompt, max_new_tokens):
+        # tokenize input
+        input_ids = torch.tensor([self.tokenizer.encode(prompt)], device=self.device)
+        # generate tokens
+        output_ids = self.model.generate(input_ids, max_new_tokens=max_new_tokens)
+        # decode to string
+        return self.tokenizer.decode(output_ids[0].tolist())
+    def select_model(self):
+        """ Selects the neural network architecture based on the desired configuration """
+        vocab_size = len(self.tokenizer.vocab)
+        if self.cfg.model_type == 'transformer':
+            model = TransformerDecoder(vocab_size=vocab_size,
+                                       hyperparam_cfg=self.hyperparam_cfg,
+                                       device=self.device).to(self.device)
+        else:
+            raise ValueError(f"Model type '{self.cfg.model_type}' is not supported!")
+        model_name = model.__class__.__name__
+        logging.info(f'Selected model type: {self.cfg.model_type} with name: {model_name}')
+        return model
+    def get_model(self, model_path):
+        """ Loads weights of the model from the specified path """
+        checkpoint = torch.load(model_path, map_location=self.device)
+        self.model.load_state_dict(checkpoint[0], strict=False)
+        logging.info(f'Restored model from: {model_path}')

model/transformer_decoder.py ADDED Viewed

	@@ -0,0 +1,166 @@

+""" Architecture of the TransformerDecoder """
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+class TransformerDecoder(nn.Module):
+    """ GPT-style decoder-only language model """
+    def __init__(self, vocab_size, hyperparam_cfg, device):
+        super(TransformerDecoder, self).__init__()
+        self.device = device
+        # model hyperparameters
+        embedding_dim = hyperparam_cfg.embedding_dim
+        num_layers = hyperparam_cfg.num_layers
+        self.context_len = hyperparam_cfg.context_len
+        # lookup table of tokens is used so that each token reads the logits for the next token
+        self.token_embedding_table = nn.Embedding(vocab_size, embedding_dim)
+        # pos embedding table adds information about the position of each token in the context
+        self.pos_embedding_table = nn.Embedding(self.context_len, embedding_dim)
+        # stack multiple transformer blocks to increase model capacity
+        self.tfblocks = nn.Sequential(*[TFBlock(hyperparam_cfg) for _ in range(num_layers)])
+        # final normalization and linear layer to produce logits for each token in the vocabulary
+        self.ln_f = nn.LayerNorm(embedding_dim)
+        self.lm_head = nn.Linear(embedding_dim, vocab_size)
+        # better weight initialization for
+        self.apply(self._init_weights)
+    def _init_weights(self, module):
+        if isinstance(module, nn.Linear):
+            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
+            if module.bias is not None:
+                torch.nn.init.zeros_(module.bias)
+        elif isinstance(module, nn.Embedding):
+            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
+    def forward(self, idx):
+        """
+        The forward pass of the model returns the logits of shape (B,T,C)
+        # where: B=batch_size T=context_len C=vocab_size
+        """
+        # idx is a (B,T) tensor of integers which are indices in the current context
+        B, T = idx.shape
+        token_embd = self.token_embedding_table(idx)      # (batch_size, context_len, embedding_dim)
+        positions = torch.arange(T).to(self.device)       # tensor([0, 1, 2, ..., T-1])
+        pos_embd = self.pos_embedding_table(positions)    # (context_len, embedding_dim)
+        x = token_embd + pos_embd                         # (batch_size, context_len, embedding_dim)
+        x = self.tfblocks(x)                              # (batch_size, context_len, embedding_dim)
+        x = self.ln_f(x)                                  # (batch_size, context_len, embedding_dim)
+        logits = self.lm_head(x)                          # (batch_size, context_len, vocab_size)
+        return logits
+    def generate(self, idx, max_new_tokens):
+        """ Generate new tokens from the model """
+        for _ in range(max_new_tokens):
+            # crop idx to the last context_len tokens
+            idx_context = idx[:, -self.context_len:]
+            # get the predictions
+            logits = self(idx_context) # (B,T,C)
+            # focus only on the last time step
+            logits = logits[:, -1, :] # (B, C)
+            # apply softmax to get probabilities
+            probs = F.softmax(logits, dim=-1) # (B, C)
+            # sample from the distribution to get the next token index
+            idx_next = torch.multinomial(probs, num_samples=1) # (B, 1)
+            # append sampled index to the running sequence
+            idx = torch.cat((idx, idx_next), dim=1) # (B, T+1)
+        return idx
+class TFBlock(nn.Module):
+    """ Single transformer block: communication (attention) followed by computation (dense) """
+    def __init__(self, hyperparam_cfg):
+        super(TFBlock, self).__init__()
+        # model hyperparameters
+        embedding_dim = hyperparam_cfg.embedding_dim
+        num_heads = hyperparam_cfg.num_heads
+        context_len = hyperparam_cfg.context_len
+        dropout = hyperparam_cfg.dropout
+        # size of MultiHeadAttention matches the embedding dimension (num_heads * head_size = embedding_dim)
+        self.sa_heads = MultiHeadAttention(num_heads=num_heads,
+                                           head_size=embedding_dim // num_heads,
+                                           embedding_dim=embedding_dim,
+                                           context_len=context_len,
+                                           dropout=dropout)
+        self.feed_forward = FeedForward(embedding_dim, dropout)
+        self.ln1 = nn.LayerNorm(embedding_dim)
+        self.ln2 = nn.LayerNorm(embedding_dim)
+    def forward(self, x):
+        # both attention and feed-forward layers have residual connections
+        x = x + self.sa_heads(self.ln1(x))
+        x = x + self.feed_forward(self.ln2(x))
+        return x
+class MultiHeadAttention(nn.Module):
+    """ Multiple heads of self-attention in parallel """
+    def __init__(self, num_heads, head_size, embedding_dim, context_len, dropout):
+        super(MultiHeadAttention, self).__init__()
+        self.heads = nn.ModuleList([AttentionHead(embedding_dim, head_size, context_len, dropout) for _ in range(num_heads)])
+        # projection is needed due to residual connection to bring all heads back to embedding_dim
+        self.projection = nn.Linear(num_heads * head_size, embedding_dim)
+        self.dropout = nn.Dropout(dropout)
+    def forward(self, x):
+        x = torch.cat([h(x) for h in self.heads], dim=-1)  # (batch, context_len, num_heads * head_size)
+        out = self.dropout(self.projection(x))             # (batch, context_len, embedding_dim)
+        return out
+class AttentionHead(nn.Module):
+    """ One head of self-attention """
+    def __init__(self, embedding_dim, head_size, context_len, dropout):
+        super(AttentionHead, self).__init__()
+        self.queries = nn.Linear(embedding_dim, head_size, bias=False)
+        self.keys = nn.Linear(embedding_dim, head_size, bias=False)
+        self.values = nn.Linear(embedding_dim, head_size, bias=False)
+        self.dropout = nn.Dropout(dropout)
+        # lower triangular matrix is used to mask out future tokens in the attention mechanism
+        self.register_buffer("mask", torch.tril(torch.ones(context_len, context_len)))
+    def forward(self, x):
+        B, T, C = x.shape    # (batch_size, context_len, embedding_dim)
+        q = self.queries(x)  # (batch, context_len, head_size)
+        k = self.keys(x)     # (batch, context_len, head_size)
+        v = self.values(x)   # (batch, context_len, head_size)
+        # compute attention matrix (key and query dot product)
+        weights = q @ k.transpose(-2, -1)  # (B,T,C) @ (B,C,T) -> (B,T,T)
+        # scale by sqrt(head_size) to prevent large dot products (stabilizes gradients)
+        weights = weights * C**-0.5
+        # mask replaces 0 with -inf and keeps 1 as is (ones are on and below diagonal; zeros above diagonal)
+        weights = weights.masked_fill(self.mask[:T, :T] == 0, float('-inf'))
+        # softmax along the last dimension to get probabilities per row
+        weights = F.softmax(weights, dim=-1)
+        weights = self.dropout(weights)
+        output = weights @ v  # matrix multiplication (T,T) @ (B,T,C) -> (B,T,C) = (batch, context_len, head_size)
+        return output
+class FeedForward(nn.Module):
+    """ Single feed-forward layer followed by a non-linearity """
+    def __init__(self, embedding_dim, dropout):
+        super(FeedForward, self).__init__()
+        # embedding_dim is multiplied by 4 to reflect the original transformer paper
+        self.net = nn.Sequential(
+            nn.Linear(embedding_dim, embedding_dim * 4),
+            nn.ReLU(),
+            nn.Linear(embedding_dim * 4, embedding_dim),
+            nn.Dropout(dropout)
+        )
+    def forward(self, x):
+        return self.net(x)

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio==5.47.2
+huggingface-hub==0.35.3
+pyyaml==6.0.2
+torch==2.4.1

tokenizer.py ADDED Viewed

	@@ -0,0 +1,21 @@

+""" Collection of tokenizers for text data. """
+import string
+class CharacterLevelTokenizer:
+    """ A simple character-level tokenizer for text data. """
+    def __init__(self):
+        """ Initializes the tokenizer by creating a vocabulary of unique characters """
+        self.vocab = sorted(set(string.ascii_letters + string.digits + string.punctuation + " \n"))
+        self.token_to_id = {token: idx for idx, token in enumerate(self.vocab)}
+        self.id_to_token = {idx: token for idx, token in enumerate(self.vocab)}
+    def encode(self, str_input):
+        """ encoder: take a string, output a list of integers """
+        return [self.token_to_id[token] for token in str_input]
+    def decode(self, token_ids):
+        """ decoder: take a list of integers, output a string """
+        return ''.join([self.id_to_token[token_id] for token_id in token_ids])