Spaces:

Hastika
/

codellama-CodeLlama-34b-Instruct-hf

Runtime error

Update app.py

182c77f verified over 1 year ago

1.59 kB

	import os
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	import gradio as gr

	# Load the model and tokenizer from Hugging Face
	model_name = "Hastika/codellama-CodeLlama-34b-Instruct-hf" # Adjust if necessary
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name)

	# Create a pipeline for text generation
	client = pipeline("text-generation", model=model, tokenizer=tokenizer)

	# System prompt
	system_prompt = {
	"role": "system",
	"content": "You are a useful assistant. You reply with efficient answers."
	}

	# Chat function
	async def chat_groq(message, history):
	messages = [system_prompt]

	# Add conversation history to messages
	for msg in history:
	messages.append({"role": "user", "content": str(msg[0])})
	messages.append({"role": "assistant", "content": str(msg[1])})

	# Add the new user message
	messages.append({"role": "user", "content": str(message)})

	# Format the conversation history as a string for the model
	conversation = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])

	# Generate response from the model
	response_content = client(conversation, max_length=1024, do_sample=True)[0]['generated_text']

	yield response_content

	# Gradio interface
	with gr.Blocks(theme=gr.themes.Monochrome(), fill_height=True) as demo:
	gr.ChatInterface(chat_groq,
	clear_btn=None,
	undo_btn=None,
	retry_btn=None,
	)

	demo.queue()
	demo.launch()