devops-slm-chat / huggingface_inference_app.py
lakhera2023's picture
Upload huggingface_inference_app.py with huggingface_hub
0aff509 verified
#!/usr/bin/env python3
"""
Hugging Face Inference API App for DevOps SLM
This creates a Gradio interface for your DevOps model
"""
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class DevOpsSLMInference:
def __init__(self):
self.model = None
self.tokenizer = None
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.model_name = "lakhera2023/devops-slm"
def load_model(self):
"""Load the DevOps SLM model"""
try:
logger.info(f"Loading model: {self.model_name}")
logger.info(f"Using device: {self.device}")
# Load tokenizer
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
# Load model
self.model = AutoModelForCausalLM.from_pretrained(
self.model_name,
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
device_map="auto" if self.device == "cuda" else None,
low_cpu_mem_usage=True,
trust_remote_code=True
)
# Set pad token if not present
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = self.tokenizer.eos_token
logger.info("Model loaded successfully!")
return True
except Exception as e:
logger.error(f"Error loading model: {e}")
return False
def generate_response(self, prompt, max_tokens=200, temperature=0.7, top_p=0.9, top_k=50):
"""Generate a response using the DevOps SLM"""
if self.model is None or self.tokenizer is None:
if not self.load_model():
return "Error: Could not load model"
try:
# Tokenize input
inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
inputs = {k: v.to(self.device) for k, v in inputs.items()}
# Generate response
with torch.no_grad():
outputs = self.model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
top_k=top_k,
do_sample=True,
num_return_sequences=1,
pad_token_id=self.tokenizer.eos_token_id,
eos_token_id=self.tokenizer.eos_token_id,
repetition_penalty=1.1,
no_repeat_ngram_size=2
)
# Decode response
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
# Remove the input prompt from the response
if response.startswith(prompt):
response = response[len(prompt):].strip()
# Clean up template artifacts
response = response.replace("<|im_start|>", "").replace("<|im_end|>", "").strip()
return response
except Exception as e:
logger.error(f"Error generating response: {e}")
return f"Error: {str(e)}"
# Initialize the inference class
devops_slm = DevOpsSLMInference()
# Example prompts for DevOps scenarios
example_prompts = [
"How do I deploy a microservice to Kubernetes?",
"What are the best practices for container security?",
"How can I monitor application performance in production?",
"Explain the difference between Docker and Kubernetes",
"What is CI/CD and how do I implement it?",
"Create a Kubernetes deployment YAML for a web application",
"How do I set up a Docker multi-stage build?",
"What are the key components of a DevOps pipeline?"
]
def create_gradio_interface():
"""Create the Gradio interface"""
with gr.Blocks(
title="DevOps SLM - Specialized Language Model",
theme=gr.themes.Soft(),
css="""
.gradio-container {
max-width: 1200px !important;
}
"""
) as demo:
gr.Markdown("""
# πŸš€ DevOps Specialized Language Model
A specialized AI model trained for DevOps tasks, Kubernetes operations, Docker containerization,
CI/CD pipelines, and infrastructure management.
**Model:** [lakhera2023/devops-slm](https://huggingface.co/lakhera2023/devops-slm)
""")
with gr.Row():
with gr.Column(scale=2):
prompt_input = gr.Textbox(
label="DevOps Question or Task",
placeholder="Ask me anything about DevOps, Kubernetes, Docker, CI/CD, or infrastructure...",
lines=3
)
with gr.Row():
max_tokens = gr.Slider(
minimum=50, maximum=500, value=200, step=10,
label="Max Tokens"
)
temperature = gr.Slider(
minimum=0.1, maximum=2.0, value=0.7, step=0.1,
label="Temperature"
)
with gr.Row():
top_p = gr.Slider(
minimum=0.1, maximum=1.0, value=0.9, step=0.05,
label="Top-p"
)
top_k = gr.Slider(
minimum=1, maximum=100, value=50, step=1,
label="Top-k"
)
generate_btn = gr.Button("Generate Response", variant="primary", size="lg")
with gr.Column(scale=1):
gr.Markdown("### πŸ“ Example Prompts")
for i, example in enumerate(example_prompts[:4]):
gr.Button(
example,
size="sm"
).click(
lambda x=example: x,
outputs=prompt_input
)
with gr.Row():
output = gr.Textbox(
label="DevOps Response",
lines=10,
show_copy_button=True
)
# Event handlers
generate_btn.click(
fn=devops_slm.generate_response,
inputs=[prompt_input, max_tokens, temperature, top_p, top_k],
outputs=output
)
# Allow Enter key to generate
prompt_input.submit(
fn=devops_slm.generate_response,
inputs=[prompt_input, max_tokens, temperature, top_p, top_k],
outputs=output
)
gr.Markdown("""
### 🎯 Model Capabilities
- **Kubernetes Operations**: Pod management, deployments, services, configmaps, secrets
- **Docker Containerization**: Container creation, optimization, and best practices
- **CI/CD Pipeline Management**: Pipeline design, automation, and troubleshooting
- **Infrastructure Automation**: Infrastructure as Code, provisioning, scaling
- **Monitoring and Observability**: Logging, metrics, alerting, debugging
- **Cloud Platform Operations**: Multi-cloud deployment and management
### πŸ“Š Model Details
- **Base Architecture**: Qwen2-0.5B (494M parameters)
- **Specialization**: DevOps, Kubernetes, Docker, CI/CD, Infrastructure
- **Max Sequence Length**: 2048 tokens
- **Model Type**: Instruction-tuned for DevOps domain
""")
return demo
if __name__ == "__main__":
# Create and launch the interface
demo = create_gradio_interface()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=True
)