Spaces:

ntaexams
/

ProfCool

Sleeping

ProfCool / app.py

Update app.py

f9acf93 verified 10 months ago

1.1 kB

	import os
	import gradio as gr
	from ctransformers import AutoModelForCausalLM

	# Define the model repository and file
	MODEL_REPO = "TheBloke/OpenHermes-2-Mistral-7B-GGUF"
	MODEL_FILE = "openhermes-2-mistral-7b.Q8_0.gguf" # Use Q8_0 for better CPU performance

	# Download and load the model
	print(f"Downloading {MODEL_FILE} from {MODEL_REPO}...")
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_REPO,
	model_file=MODEL_FILE,
	model_type="mistral",
	# gpu_layers=50 if torch.cuda.is_available() else 0, # Use GPU if available
	context_length=256 # Reduce context length for faster response
	)
	print("Model loaded successfully.")

	# Function to generate responses
	def chat_with_model(prompt):
	response = model(prompt)
	return response

	# Gradio UI
	iface = gr.Interface(
	fn=chat_with_model,
	inputs=gr.Textbox(lines=2, placeholder="Enter your query..."),
	outputs="text",
	title="Mistral-7B Chatbot",
	description="Optimized chatbot using Mistral-7B GGUF with improved speed.",
	)

	# Run the Gradio app
	if __name__ == "__main__":
	iface.launch(share=True)