Spaces:

Testsdft
/

Rx_Codex_V1_Tiny_V3

Runtime error

App Files Files Community

Testsdft commited on Sep 5, 2025

Commit

42fb1be

verified ·

1 Parent(s): 2eaea30

Upload 4 files

Browse files

Files changed (4) hide show

Dockerfile +27 -0
main.py +139 -0
modeling_rx_codex_v3.py +104 -0
requirements.txt +7 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,27 @@

+# Use an official Python base image
+FROM python:3.11-slim
+# Set the working directory inside the container
+WORKDIR /code
+# Set environment variable for Hugging Face cache
+ENV HF_HOME /code/.cache
+# Create the cache directory and make it fully writable.
+RUN mkdir -p /code/.cache && chmod -R 777 /code/.cache
+# Copy and install requirements
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+# --- CRITICAL: Copy our custom model's code ---
+COPY ./modeling_rx_codex_v3.py /code/modeling_rx_codex_v3.py
+# Copy application code
+COPY ./main.py /code/app.py
+# Expose the port Hugging Face Spaces expects
+EXPOSE 7860
+# Run the application
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

main.py ADDED Viewed

	@@ -0,0 +1,139 @@

+# main.py (Corrected)
+import logging
+from contextlib import asynccontextmanager
+import torch
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from transformers import AutoTokenizer, GPT2Config
+from huggingface_hub import hf_hub_download
+# --- IMPORTANT: We must import our custom model class directly ---
+# This assumes 'modeling_rx_codex_v3.py' is in the same directory
+from modeling_rx_codex_v3 import Rx_Codex_V3_Custom_Model_Class
+# --- Configuration ---
+HF_REPO_ID = "rxmha125/Rx_Codex_V1_Tiny_V3"
+MODEL_LOAD_DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# --- Logging Setup ---
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# --- Global variables ---
+model = None
+tokenizer = None
+# --- Application Lifespan (Model Loading) ---
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    global model, tokenizer
+    logger.info(f"API Startup: Explicitly loading model '{HF_REPO_ID}' to device '{MODEL_LOAD_DEVICE}'...")
+    try:
+        # Load tokenizer as before
+        tokenizer = AutoTokenizer.from_pretrained(HF_REPO_ID)
+        logger.info("✅ Tokenizer loaded successfully.")
+        # --- EXPLICIT MODEL LOADING ---
+        # 1. Load the configuration file
+        config = GPT2Config.from_pretrained(HF_REPO_ID)
+        logger.info("✅ Config loaded successfully.")
+        # 2. Instantiate our custom model with the config
+        model = Rx_Codex_V3_Custom_Model_Class(config)
+        logger.info("✅ Custom model architecture instantiated.")
+        # 3. Download the model weights file specifically
+        weights_path = hf_hub_download(repo_id=HF_REPO_ID, filename="pytorch_model.bin")
+        logger.info("✅ Model weights downloaded successfully.")
+        # 4. Load the state dictionary into our custom model
+        state_dict = torch.load(weights_path, map_location=MODEL_LOAD_DEVICE)
+        model.load_state_dict(state_dict)
+        logger.info("✅ Weights loaded into custom model successfully.")
+        # 5. Move to device and set to evaluation mode
+        model.to(MODEL_LOAD_DEVICE)
+        model.eval()
+        logger.info("✅ Model is fully loaded and ready on the target device.")
+    except Exception as e:
+        logger.error(f"❌ FATAL: An error occurred during model loading: {e}", exc_info=True)
+        # Set model to None to ensure API returns "not ready"
+        model = None
+        tokenizer = None
+    yield
+    # --- Code below this line runs on shutdown ---
+    logger.info("API Shutting down.")
+    model = None
+    tokenizer = None
+# --- Initialize FastAPI ---
+app = FastAPI(
+    title="Rx Codex V1-Tiny-V3 API",
+    description="An API for generating text with the Rx_Codex_V1_Tiny_V3 model.",
+    lifespan=lifespan
+)
+# --- Pydantic Models for API Data Validation ---
+class GenerationRequest(BaseModel):
+    prompt: str
+    max_new_tokens: int = 150
+    temperature: float = 0.7
+    top_k: int = 50
+class GenerationResponse(BaseModel):
+    generated_text: str
+# --- API Endpoints ---
+@app.get("/")
+def root():
+    """A simple endpoint to check if the API is running."""
+    status = "loaded" if model and tokenizer else "not loaded"
+    return {"message": "Rx Codex V1-Tiny-V3 API is running", "model_status": status}
+@app.post("/generate", response_model=GenerationResponse)
+async def generate_text(request: GenerationRequest):
+    """The main endpoint to generate text from a prompt."""
+    if not model or not tokenizer:
+        raise HTTPException(status_code=503, detail="Model is not ready. Please try again later.")
+    logger.info(f"Received generation request for prompt: '{request.prompt}'")
+    formatted_prompt = f"### Human:\n{request.prompt}\n\n### Assistant:"
+    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(MODEL_LOAD_DEVICE)
+    # --- NOTE: Our custom model does not have a .generate() method ---
+    # We must use our manual generation loop
+    output_ids = inputs["input_ids"]
+    with torch.no_grad():
+        for _ in range(request.max_new_tokens):
+            outputs = model(output_ids)
+            next_token_logits = outputs['logits'][:, -1, :]
+            # Apply temperature
+            if request.temperature > 0:
+                next_token_logits = next_token_logits / request.temperature
+            # Apply top-k
+            if request.top_k > 0:
+                v, _ = torch.topk(next_token_logits, min(request.top_k, next_token_logits.size(-1)))
+                next_token_logits[next_token_logits < v[:, [-1]]] = -float('Inf')
+            probs = torch.nn.functional.softmax(next_token_logits, dim=-1)
+            next_token_id = torch.multinomial(probs, num_samples=1)
+            # Stop if EOS token is generated
+            if next_token_id == tokenizer.eos_token_id:
+                break
+            output_ids = torch.cat((output_ids, next_token_id), dim=1)
+    full_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    generated_text = full_text[len(formatted_prompt):].strip()
+    logger.info("Generation complete.")
+    return GenerationResponse(generated_text=generated_text)

modeling_rx_codex_v3.py ADDED Viewed

	@@ -0,0 +1,104 @@

+# modeling_rx_codex_v3.py
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+from transformers import GPT2Config, PreTrainedModel
+from transformers.modeling_outputs import CausalLMOutputWithPast
+# --- Helper Functions for RoPE ---
+def precompute_freqs_cis(dim: int, end: int, theta: float = 10000.0):
+    freqs = 1.0 / (theta ** (torch.arange(0, dim, 2)[: (dim // 2)].float() / dim))
+    t = torch.arange(end, device=freqs.device)
+    freqs = torch.outer(t, freqs).float()
+    freqs_cis = torch.polar(torch.ones_like(freqs), freqs)
+    return freqs_cis
+def apply_rotary_emb(xq: torch.Tensor, xk: torch.Tensor, freqs_cis: torch.Tensor):
+    xq_ = torch.view_as_complex(xq.float().reshape(*xq.shape[:-1], -1, 2))
+    xk_ = torch.view_as_complex(xk.float().reshape(*xk.shape[:-1], -1, 2))
+    freqs_cis = freqs_cis.unsqueeze(0).unsqueeze(0)
+    xq_out = torch.view_as_real(xq_ * freqs_cis).flatten(3)
+    xk_out = torch.view_as_real(xk_ * freqs_cis).flatten(3)
+    return xq_out.type_as(xq), xk_out.type_as(xk)
+# --- Model Modules ---
+class CausalSelfAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        assert config.n_embd % config.n_head == 0
+        self.c_attn = nn.Linear(config.n_embd, 3 * config.n_embd)
+        self.c_proj = nn.Linear(config.n_embd, config.n_embd)
+        self.n_head = config.n_head
+        self.n_embd = config.n_embd
+    def forward(self, x, freqs_cis):
+        B, T, C = x.size()
+        q, k, v  = self.c_attn(x).split(self.n_embd, dim=2)
+        k = k.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
+        q = q.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
+        v = v.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
+        q, k = apply_rotary_emb(q, k, freqs_cis=freqs_cis)
+        y = F.scaled_dot_product_attention(q, k, v, attn_mask=None, dropout_p=0.0, is_causal=True)
+        y = y.transpose(1, 2).contiguous().view(B, T, C)
+        y = self.c_proj(y)
+        return y
+class SwiGLU_MLP(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        hidden_dim = int(2/3 * 4 * config.n_embd)
+        hidden_dim = (hidden_dim + 127) // 128 * 128
+        self.w1 = nn.Linear(config.n_embd, hidden_dim, bias=False)
+        self.w3 = nn.Linear(config.n_embd, hidden_dim, bias=False)
+        self.w2 = nn.Linear(hidden_dim, config.n_embd, bias=False)
+    def forward(self, x):
+        return self.w2(F.silu(self.w1(x)) * self.w3(x))
+class Block(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.ln_1 = nn.LayerNorm(config.n_embd)
+        self.attn = CausalSelfAttention(config)
+        self.ln_2 = nn.LayerNorm(config.n_embd)
+        self.mlp = SwiGLU_MLP(config)
+    def forward(self, x, freqs_cis):
+        x = x + self.attn(self.ln_1(x), freqs_cis=freqs_cis)
+        x = x + self.mlp(self.ln_2(x))
+        return x
+# --- Main Model Class ---
+class Rx_Codex_V3_Custom_Model_Class(PreTrainedModel):
+    config_class = GPT2Config
+    def __init__(self, config):
+        super().__init__(config)
+        self.config = config
+        self.transformer = nn.ModuleDict(dict(
+            wte = nn.Embedding(config.vocab_size, config.n_embd),
+            h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
+            ln_f = nn.LayerNorm(config.n_embd),
+        ))
+        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
+        self.transformer.wte.weight = self.lm_head.weight
+        head_dim = config.n_embd // config.n_head
+        freqs_cis = precompute_freqs_cis(head_dim, self.config.n_positions * 2)
+        self.register_buffer("freqs_cis", freqs_cis)
+    def forward(self, input_ids, labels=None, **kwargs):
+        b, t = input_ids.size()
+        tok_emb = self.transformer.wte(input_ids)
+        x = tok_emb
+        freqs_cis = self.freqs_cis[:t]
+        for block in self.transformer.h:
+            x = block(x, freqs_cis=freqs_cis)
+        x = self.transformer.ln_f(x)
+        logits = self.lm_head(x)
+        loss = None
+        if labels is not None:
+            shift_logits = logits[..., :-1, :].contiguous()
+            shift_labels = labels[..., 1:].contiguous()
+            loss_fct = nn.CrossEntropyLoss()
+            loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
+        return CausalLMOutputWithPast(loss=loss, logits=logits)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi
+uvicorn
+# Use the CPU version of torch for wider compatibility on free hardware
+torch --index-url https://download.pytorch.org/whl/cpu
+transformers
+sentencepiece
+accelerate