Spaces:

Sidharthan
/

Scriptr

Runtime error

App Files Files Community

Sidharthan commited on Nov 4, 2024

Commit

9d2f477

1 Parent(s): 301f7c4

Added the application files, including the inference endpoints and configs

Browse files

Files changed (3) hide show

Dockerfile +28 -0
app.py +78 -0
requirements.txt +8 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,28 @@

+# Use CUDA-compatible base image if you need GPU support
+# For CPU-only:
+FROM python:3.9-slim
+# Set working directory
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first to leverage Docker cache
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# Copy the application code
+COPY . .
+# Expose the port the app runs on
+EXPOSE 7860
+# Command to run the application
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,78 @@

+# app.py
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from transformers import AutoTokenizer
+from peft import AutoPeftModelForCausalLM
+import torch
+from typing import Optional
+app = FastAPI(title="Gemma Script Generator API")
+# Load model and tokenizer
+MODEL_NAME = "Sidharthan/gemma2_scripter"
+try:
+    tokenizer = AutoTokenizer.from_pretrained(
+        MODEL_NAME,
+        trust_remote_code=True
+    )
+    model = AutoPeftModelForCausalLM.from_pretrained(
+        MODEL_NAME,
+        device_map="auto",  # Will use CPU if GPU not available
+        trust_remote_code=True,
+        #load_in_4bit=True
+    )
+except Exception as e:
+    print(f"Error loading model: {str(e)}")
+    raise
+class GenerationRequest(BaseModel):
+    message: str
+    max_length: Optional[int] = 512
+    temperature: Optional[float] = 0.7
+    top_p: Optional[float] = 0.95
+    top_k: Optional[int] = 50
+    repetition_penalty: Optional[float] = 1.2
+class GenerationResponse(BaseModel):
+    generated_text: str
+@app.post("/generate", response_model=GenerationResponse)
+async def generate_script(request: GenerationRequest):
+    try:
+        # Format prompt
+        prompt = request.message
+        # Tokenize input
+        inputs = tokenizer(prompt, return_tensors="pt")
+        if torch.cuda.is_available():
+            inputs = {k: v.cuda() for k, v in inputs.items()}
+        # Generate
+        outputs = model.generate(
+            **inputs,
+            max_length=request.max_length,
+            do_sample=True,
+            temperature=request.temperature,
+            top_p=request.top_p,
+            top_k=request.top_k,
+            repetition_penalty=request.repetition_penalty,
+            num_return_sequences=1,
+            pad_token_id=tokenizer.pad_token_id,
+            eos_token_id=tokenizer.eos_token_id
+        )
+        # Decode output
+        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return GenerationResponse(generated_text=generated_text)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/health")
+async def health_check():
+    return {"status": "healthy"}
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+fastapi
+uvicorn[standard]
+torch
+transformers
+peft
+pydantic
+bitsandbytes
+accelerate