Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| Hugging Face Inference API App for DevOps SLM | |
| This creates a Gradio interface for your DevOps model | |
| """ | |
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import logging | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class DevOpsSLMInference: | |
| def __init__(self): | |
| self.model = None | |
| self.tokenizer = None | |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
| self.model_name = "lakhera2023/devops-slm" | |
| def load_model(self): | |
| """Load the DevOps SLM model""" | |
| try: | |
| logger.info(f"Loading model: {self.model_name}") | |
| logger.info(f"Using device: {self.device}") | |
| # Load tokenizer | |
| self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) | |
| # Load model | |
| self.model = AutoModelForCausalLM.from_pretrained( | |
| self.model_name, | |
| torch_dtype=torch.float16 if self.device == "cuda" else torch.float32, | |
| device_map="auto" if self.device == "cuda" else None, | |
| low_cpu_mem_usage=True, | |
| trust_remote_code=True | |
| ) | |
| # Set pad token if not present | |
| if self.tokenizer.pad_token is None: | |
| self.tokenizer.pad_token = self.tokenizer.eos_token | |
| logger.info("Model loaded successfully!") | |
| return True | |
| except Exception as e: | |
| logger.error(f"Error loading model: {e}") | |
| return False | |
| def generate_response(self, prompt, max_tokens=200, temperature=0.7, top_p=0.9, top_k=50): | |
| """Generate a response using the DevOps SLM""" | |
| if self.model is None or self.tokenizer is None: | |
| if not self.load_model(): | |
| return "Error: Could not load model" | |
| try: | |
| # Tokenize input | |
| inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) | |
| inputs = {k: v.to(self.device) for k, v in inputs.items()} | |
| # Generate response | |
| with torch.no_grad(): | |
| outputs = self.model.generate( | |
| **inputs, | |
| max_new_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| top_k=top_k, | |
| do_sample=True, | |
| num_return_sequences=1, | |
| pad_token_id=self.tokenizer.eos_token_id, | |
| eos_token_id=self.tokenizer.eos_token_id, | |
| repetition_penalty=1.1, | |
| no_repeat_ngram_size=2 | |
| ) | |
| # Decode response | |
| response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Remove the input prompt from the response | |
| if response.startswith(prompt): | |
| response = response[len(prompt):].strip() | |
| # Clean up template artifacts | |
| response = response.replace("<|im_start|>", "").replace("<|im_end|>", "").strip() | |
| return response | |
| except Exception as e: | |
| logger.error(f"Error generating response: {e}") | |
| return f"Error: {str(e)}" | |
| # Initialize the inference class | |
| devops_slm = DevOpsSLMInference() | |
| # Example prompts for DevOps scenarios | |
| example_prompts = [ | |
| "How do I deploy a microservice to Kubernetes?", | |
| "What are the best practices for container security?", | |
| "How can I monitor application performance in production?", | |
| "Explain the difference between Docker and Kubernetes", | |
| "What is CI/CD and how do I implement it?", | |
| "Create a Kubernetes deployment YAML for a web application", | |
| "How do I set up a Docker multi-stage build?", | |
| "What are the key components of a DevOps pipeline?" | |
| ] | |
| def create_gradio_interface(): | |
| """Create the Gradio interface""" | |
| with gr.Blocks( | |
| title="DevOps SLM - Specialized Language Model", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .gradio-container { | |
| max-width: 1200px !important; | |
| } | |
| """ | |
| ) as demo: | |
| gr.Markdown(""" | |
| # π DevOps Specialized Language Model | |
| A specialized AI model trained for DevOps tasks, Kubernetes operations, Docker containerization, | |
| CI/CD pipelines, and infrastructure management. | |
| **Model:** [lakhera2023/devops-slm](https://huggingface.co/lakhera2023/devops-slm) | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| prompt_input = gr.Textbox( | |
| label="DevOps Question or Task", | |
| placeholder="Ask me anything about DevOps, Kubernetes, Docker, CI/CD, or infrastructure...", | |
| lines=3 | |
| ) | |
| with gr.Row(): | |
| max_tokens = gr.Slider( | |
| minimum=50, maximum=500, value=200, step=10, | |
| label="Max Tokens" | |
| ) | |
| temperature = gr.Slider( | |
| minimum=0.1, maximum=2.0, value=0.7, step=0.1, | |
| label="Temperature" | |
| ) | |
| with gr.Row(): | |
| top_p = gr.Slider( | |
| minimum=0.1, maximum=1.0, value=0.9, step=0.05, | |
| label="Top-p" | |
| ) | |
| top_k = gr.Slider( | |
| minimum=1, maximum=100, value=50, step=1, | |
| label="Top-k" | |
| ) | |
| generate_btn = gr.Button("Generate Response", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| gr.Markdown("### π Example Prompts") | |
| for i, example in enumerate(example_prompts[:4]): | |
| gr.Button( | |
| example, | |
| size="sm" | |
| ).click( | |
| lambda x=example: x, | |
| outputs=prompt_input | |
| ) | |
| with gr.Row(): | |
| output = gr.Textbox( | |
| label="DevOps Response", | |
| lines=10, | |
| show_copy_button=True | |
| ) | |
| # Event handlers | |
| generate_btn.click( | |
| fn=devops_slm.generate_response, | |
| inputs=[prompt_input, max_tokens, temperature, top_p, top_k], | |
| outputs=output | |
| ) | |
| # Allow Enter key to generate | |
| prompt_input.submit( | |
| fn=devops_slm.generate_response, | |
| inputs=[prompt_input, max_tokens, temperature, top_p, top_k], | |
| outputs=output | |
| ) | |
| gr.Markdown(""" | |
| ### π― Model Capabilities | |
| - **Kubernetes Operations**: Pod management, deployments, services, configmaps, secrets | |
| - **Docker Containerization**: Container creation, optimization, and best practices | |
| - **CI/CD Pipeline Management**: Pipeline design, automation, and troubleshooting | |
| - **Infrastructure Automation**: Infrastructure as Code, provisioning, scaling | |
| - **Monitoring and Observability**: Logging, metrics, alerting, debugging | |
| - **Cloud Platform Operations**: Multi-cloud deployment and management | |
| ### π Model Details | |
| - **Base Architecture**: Qwen2-0.5B (494M parameters) | |
| - **Specialization**: DevOps, Kubernetes, Docker, CI/CD, Infrastructure | |
| - **Max Sequence Length**: 2048 tokens | |
| - **Model Type**: Instruction-tuned for DevOps domain | |
| """) | |
| return demo | |
| if __name__ == "__main__": | |
| # Create and launch the interface | |
| demo = create_gradio_interface() | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=True | |
| ) | |