Spaces:
Runtime error
Runtime error
| import os | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| import gradio as gr | |
| # Load the model and tokenizer from Hugging Face | |
| model_name = "Hastika/codellama-CodeLlama-34b-Instruct-hf" # Adjust if necessary | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForCausalLM.from_pretrained(model_name) | |
| # Create a pipeline for text generation | |
| client = pipeline("text-generation", model=model, tokenizer=tokenizer) | |
| # System prompt | |
| system_prompt = { | |
| "role": "system", | |
| "content": "You are a useful assistant. You reply with efficient answers." | |
| } | |
| # Chat function | |
| async def chat_groq(message, history): | |
| messages = [system_prompt] | |
| # Add conversation history to messages | |
| for msg in history: | |
| messages.append({"role": "user", "content": str(msg[0])}) | |
| messages.append({"role": "assistant", "content": str(msg[1])}) | |
| # Add the new user message | |
| messages.append({"role": "user", "content": str(message)}) | |
| # Format the conversation history as a string for the model | |
| conversation = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages]) | |
| # Generate response from the model | |
| response_content = client(conversation, max_length=1024, do_sample=True)[0]['generated_text'] | |
| yield response_content | |
| # Gradio interface | |
| with gr.Blocks(theme=gr.themes.Monochrome(), fill_height=True) as demo: | |
| gr.ChatInterface(chat_groq, | |
| clear_btn=None, | |
| undo_btn=None, | |
| retry_btn=None, | |
| ) | |
| demo.queue() | |
| demo.launch() | |