Spaces:

KoKoDanio
/

Cyber_Bot

Build error

App Files Files Community

KoKoDanio commited on Aug 30, 2025

Commit

b49a99c

1 Parent(s): d09a01e

1st commit

Browse files

Files changed (3) hide show

DockerFile +22 -0
app.py +91 -0
requirements.txt +6 -0

DockerFile ADDED Viewed

	@@ -0,0 +1,22 @@

+# Use a base image with Python
+FROM python:3.10-slim
+# Set the working directory in the container
+WORKDIR /app
+# Copy the requirements file and install dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Install the correct unsloth version
+# This command is crucial for proper GPU setup
+RUN pip install "unsloth[cu121-ampere-gpu-smashed] @ git+https://github.com/unslothai/unsloth.git"
+# Copy the rest of your application code
+COPY . .
+# Expose the port your application will run on
+EXPOSE 8000
+# Command to run your application using Uvicorn
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import torch
+from fastapi import FastAPI
+from pydantic import BaseModel, Field
+from unsloth import FastLanguageModel
+from transformers import AutoTokenizer, StoppingCriteria, StoppingCriteriaList
+# Initialize FastAPI app
+app = FastAPI(title="Llama-3.1 Finetuned API", version="1.0.0")
+# --- Model Loading ---
+try:
+    lora_adapter_path = "cyber_llama"
+    model, tokenizer = FastLanguageModel.from_pretrained(
+        model_name=lora_adapter_path,
+        max_seq_length=2048,
+        load_in_4bit=True,
+    )
+    FastLanguageModel.for_inference(model)
+except Exception as e:
+    print(f"Error loading model: {e}")
+    # Set to None to handle errors gracefully in the API endpoint
+    model = None
+    tokenizer = None
+# Pydantic model for request body
+class PromptRequest(BaseModel):
+    prompt: str = Field(..., description="The user's prompt or instruction for the model.")
+    max_new_tokens: int = Field(512, ge=1, description="Maximum number of tokens to generate.")
+    stop_sequences: list[str] = Field([".", "!", "?"], description="A list of strings that will stop the generation.")
+# A custom stopping criteria class for the stop sequences
+class StopOnTokens(StoppingCriteria):
+    def __init__(self, stop_token_ids):
+        super().__init__()
+        self.stop_token_ids = stop_token_ids
+    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
+        return any(input_ids[0][-1] == token_id for token_id in self.stop_token_ids)
+# API endpoint for text generation
+@app.post("/generate", summary="Generates text based on a given prompt")
+async def generate(request: PromptRequest):
+    if not model or not tokenizer:
+        return {"error": "Model not loaded. Please check the server logs."}
+    # The prompt template for the model
+    alpaca_prompt = """You are a trustworthy cybersecurity and privacy assistant that provides clear, safe, and practical guidance on protecting data, avoiding threats, and staying secure online.
+### Instruction:
+Analyse the user input and answer the question carefully. Please try to obey the cybersecurity and privacy laws.
+### Input:
+{}
+### Response:
+{}"""
+    inputs = tokenizer(
+        [
+            alpaca_prompt.format(
+                request.prompt,  # input from the user
+                "",  # empty response to be filled by the model
+            )
+        ],
+        return_tensors="pt"
+    ).to("cuda")
+    # Convert the stop sequences to token IDs
+    stop_token_ids = tokenizer.convert_tokens_to_ids(request.stop_sequences)
+    # Create the stopping criteria list
+    stopping_criteria = StoppingCriteriaList([StopOnTokens(stop_token_ids)])
+    outputs = model.generate(
+        **inputs,
+        max_new_tokens=request.max_new_tokens,
+        use_cache=True,
+        do_sample=True, # Recommended for better creative responses
+        stopping_criteria=stopping_criteria
+    )
+    # Decode the generated text
+    generated_text = tokenizer.batch_decode(outputs[:, inputs['input_ids'].shape[1]:], skip_special_tokens=True)[0]
+    return {"generated_text": generated_text}
+# This section is for local testing and will not be run on Hugging Face Spaces
+if __name__ == "__main__":
+    import uvicorn
+    # Make sure to include the ngrok setup for local testing on Colab
+    uvicorn.run(app, host="0.0.0.0", port=8000)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+torch
+transformers
+unsloth[cu121-ampere-torch230] @ git+https://github.com/unslothai/unsloth.git@August-2025
+streamlit
+fastapi
+uvicorn