devops-slm-chat / app.py
lakhera2023's picture
Update app.py
75385b9 verified
import sys
import os
import json
import requests
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Optional, Dict, Any
import uvicorn
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# FastAPI app for hosting the Gradio interface
app = FastAPI(title="DevOps SLM Interface")
# Your Hugging Face endpoint details
HF_ENDPOINT_URL = "https://bcg2lrpnfylqamcz.us-east-1.aws.endpoints.huggingface.cloud"
HF_API_TOKEN = os.getenv("HF_API_TOKEN") # Must be set in Space environment variables
# User-friendly API base URL
API_BASE_URL = "https://lakhera2023-devops-slm-chat.hf.space"
# Pydantic models for OpenAI API
class CompletionRequest(BaseModel):
prompt: str
max_tokens: Optional[int] = 100
temperature: Optional[float] = 0.7
top_p: Optional[float] = 1.0
frequency_penalty: Optional[float] = 0.0
presence_penalty: Optional[float] = 0.0
stop: Optional[List[str]] = None
stream: Optional[bool] = False
class ChatMessage(BaseModel):
role: str
content: str
class ChatCompletionRequest(BaseModel):
messages: List[ChatMessage]
max_tokens: Optional[int] = 100
temperature: Optional[float] = 0.7
top_p: Optional[float] = 1.0
frequency_penalty: Optional[float] = 0.0
presence_penalty: Optional[float] = 0.0
stop: Optional[List[str]] = None
stream: Optional[bool] = False
# DevOps SLM Inference Class (from your existing code)
class DevOpsSLMInference:
def __init__(self):
self.model = None
self.tokenizer = None
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.model_name = "lakhera2023/devops-slm"
def load_model(self):
"""Load the DevOps SLM model"""
try:
logger.info(f"Loading model: {self.model_name}")
logger.info(f"Using device: {self.device}")
# Load tokenizer
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
# Load model
self.model = AutoModelForCausalLM.from_pretrained(
self.model_name,
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
device_map="auto" if self.device == "cuda" else None,
low_cpu_mem_usage=True,
trust_remote_code=True
)
# Set pad token if not present
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = self.tokenizer.eos_token
logger.info("Model loaded successfully!")
return True
except Exception as e:
logger.error(f"Error loading model: {e}")
return False
def generate_response(self, prompt, max_tokens=200, temperature=0.7, top_p=0.9, top_k=50):
"""Generate a response using the DevOps SLM"""
if self.model is None or self.tokenizer is None:
if not self.load_model():
return "Error: Could not load model"
try:
# Tokenize input
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
inputs = {k: v.to(self.device) for k, v in inputs.items()}
# Generate response
with torch.no_grad():
outputs = self.model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
top_k=top_k,
do_sample=True,
num_return_sequences=1,
pad_token_id=self.tokenizer.eos_token_id,
eos_token_id=self.tokenizer.eos_token_id,
repetition_penalty=1.1,
no_repeat_ngram_size=2
)
# Decode response
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
# Remove the input prompt from the response
if response.startswith(prompt):
response = response[len(prompt):].strip()
# Clean up template artifacts
response = response.replace("<|im_start|>", "").replace("<|im_end|>", "").strip()
return response
except Exception as e:
logger.error(f"Error generating response: {e}")
return f"Error: {str(e)}"
# Initialize the inference class
devops_slm = DevOpsSLMInference()
# Utility functions for OpenAI API
def count_tokens(text: str) -> int:
"""Simple token counting - you might want to use a proper tokenizer"""
return len(text.split()) * 1.3 # Rough approximation
def call_hf_endpoint(prompt: str, max_tokens: int = 100) -> Dict[str, Any]:
"""Call the Hugging Face endpoint"""
headers = {
"Authorization": f"Bearer {HF_API_TOKEN}",
"Content-Type": "application/json"
}
data = {
"inputs": prompt,
"parameters": {
"max_new_tokens": max_tokens,
"temperature": 0.7,
"do_sample": True,
"return_full_text": False
}
}
try:
response = requests.post(HF_ENDPOINT_URL, headers=headers, json=data, timeout=60)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
raise HTTPException(status_code=500, detail=f"Hugging Face API error: {str(e)}")
# Simple health check endpoint
@app.get("/health")
async def health_check():
"""Health check endpoint"""
return {"status": "healthy", "model": "devops-slm", "interface": "gradio"}
@app.get("/api")
async def api_info():
"""API information endpoint"""
return {
"message": "DevOps SLM - Use Inference Endpoint for API Access",
"inference_endpoint": "https://bcg2lrpnfylqamcz.us-east-1.aws.endpoints.huggingface.cloud",
"ui_url": "https://huggingface.co/spaces/lakhera2023/devops-slm-chat/ui",
"note": "For API access, use the Hugging Face Inference Endpoint. See README.md for examples."
}
# Gradio interface (from your existing code)
example_prompts = [
"How do I deploy a microservice to Kubernetes?",
"What are the best practices for container security?",
"How can I monitor application performance in production?",
"Explain the difference between Docker and Kubernetes",
"What is CI/CD and how do I implement it?",
"Create a Kubernetes deployment YAML for a web application",
"How do I set up a Docker multi-stage build?",
"What are the key components of a DevOps pipeline?"
]
def create_gradio_interface():
"""Create the Gradio interface"""
with gr.Blocks(
title="DevOps SLM - Specialized Language Model",
theme=gr.themes.Soft(),
css="""
.gradio-container {
max-width: 1200px !important;
}
"""
) as demo:
gr.Markdown("""
# πŸš€ DevOps Specialized Language Model
A specialized AI model trained for DevOps tasks, Kubernetes operations, Docker containerization,
CI/CD pipelines, and infrastructure management.
**Model:** [lakhera2023/devops-slm](https://huggingface.co/lakhera2023/devops-slm)
""")
with gr.Row():
with gr.Column(scale=2):
prompt_input = gr.Textbox(
label="DevOps Question or Task",
placeholder="Ask me anything about DevOps, Kubernetes, Docker, CI/CD, or infrastructure...",
lines=3
)
with gr.Row():
max_tokens = gr.Slider(
minimum=50, maximum=500, value=200, step=10,
label="Max Tokens"
)
temperature = gr.Slider(
minimum=0.1, maximum=2.0, value=0.7, step=0.1,
label="Temperature"
)
with gr.Row():
top_p = gr.Slider(
minimum=0.1, maximum=1.0, value=0.9, step=0.05,
label="Top-p"
)
top_k = gr.Slider(
minimum=1, maximum=100, value=50, step=1,
label="Top-k"
)
generate_btn = gr.Button("Generate Response", variant="primary", size="lg")
with gr.Column(scale=1):
gr.Markdown("### πŸ“ Example Prompts")
for i, example in enumerate(example_prompts[:4]):
gr.Button(
example,
size="sm"
).click(
lambda x=example: x,
outputs=prompt_input
)
with gr.Row():
output = gr.Textbox(
label="DevOps Response",
lines=10,
show_copy_button=True
)
# Event handlers
generate_btn.click(
fn=devops_slm.generate_response,
inputs=[prompt_input, max_tokens, temperature, top_p, top_k],
outputs=output
)
# Allow Enter key to generate
prompt_input.submit(
fn=devops_slm.generate_response,
inputs=[prompt_input, max_tokens, temperature, top_p, top_k],
outputs=output
)
gr.Markdown("""
### 🎯 Model Capabilities
- **Kubernetes Operations**: Pod management, deployments, services, configmaps, secrets
- **Docker Containerization**: Container creation, optimization, and best practices
- **CI/CD Pipeline Management**: Pipeline design, automation, and troubleshooting
- **Infrastructure Automation**: Infrastructure as Code, provisioning, scaling
- **Monitoring and Observability**: Logging, metrics, alerting, debugging
- **Cloud Platform Operations**: Multi-cloud deployment and management
### πŸ“Š Model Details
- **Base Architecture**: Qwen (494M parameters)
- **Specialization**: DevOps, Kubernetes, Docker, CI/CD, Infrastructure
- **Max Sequence Length**: 2048 tokens
- **Model Type**: Instruction-tuned for DevOps domain
### πŸ”Œ API Integration
This model is available via Hugging Face Inference Endpoint:
- **Inference Endpoint**: `https://bcg2lrpnfylqamcz.us-east-1.aws.endpoints.huggingface.cloud`
- **Format**: Standard Hugging Face inference API with `inputs` parameter
- **Documentation**: See README.md for complete API examples
""")
return demo
# Create Gradio interface
demo = create_gradio_interface()
# For Hugging Face Spaces, just launch Gradio directly
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False
)