| | import os |
| | import gradio as gr |
| | from huggingface_hub import InferenceClient |
| |
|
| | class CodingAssistant: |
| | def __init__(self): |
| | self.client = InferenceClient( |
| | model="codellama/CodeLlama-7b-Instruct-hf", |
| | token=os.getenv("HF_TOKEN") |
| | ) |
| | self.chat_history = [] |
| | |
| | def respond(self, message, history): |
| | |
| | prompt = f"""<s>[INST] <<SYS>> |
| | You are an expert Python programmer. Provide safe, efficient code solutions. |
| | Maintain conversation history: {self.chat_history[-3:] if self.chat_history else 'None'} |
| | <</SYS>> {message} [/INST]""" |
| | |
| | |
| | response = self.client.text_generation( |
| | prompt=prompt, |
| | max_new_tokens=1024, |
| | temperature=0.2, |
| | repetition_penalty=1.1 |
| | ) |
| | |
| | |
| | self.chat_history.append((message, response)) |
| | return response |
| |
|
| | |
| | assistant = CodingAssistant() |
| |
|
| | |
| | demo = gr.ChatInterface( |
| | fn=assistant.respond, |
| | examples=[ |
| | "How to implement a neural network in PyTorch?", |
| | "Write a Python decorator for rate limiting", |
| | "Optimize this pandas code: ..." |
| | ], |
| | title="Code Expert Assistant", |
| | description="Ask me complex Python programming questions", |
| | theme="soft", |
| | retry_btn=None, |
| | undo_btn=None |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |