ShabanEjupi
/

Chatbot-i

Model card Files Files and versions

Chatbot-i / app.py

ShabanEjupi's picture

Update app.py

d9e9533 verified about 1 year ago

history blame contribute delete

2.61 kB

	import subprocess
	import sys

	# Install required packages
	def install(package):
	subprocess.check_call([sys.executable, "-m", "pip", "install", package])

	# Install dependencies
	packages = [
	"transformers==4.34.0",
	"torch==2.0.1+cu118",
	"gradio==3.39.0",
	"accelerate==0.23.0",
	"bitsandbytes==0.41.1",
	"sentencepiece==0.1.99",
	"python-dotenv==1.0.0"
	]

	for package in packages:
	try:
	install(package)
	except Exception as e:
	print(f"Failed to install {package}: {e}")

	# Install bitsandbytes from Test PyPI if needed
	try:
	import bitsandbytes
	except ImportError:
	install("bitsandbytes -i https://test.pypi.org/simple/")

	from transformers import LlamaTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
	import gradio as gr
	from huggingface_hub import login
	from dotenv import load_dotenv
	import os
	import torch

	# Load environment variables
	load_dotenv()

	# Log in to Hugging Face
	huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
	if huggingface_token:
	login(token=huggingface_token)
	else:
	raise ValueError("HUGGINGFACE_TOKEN is missing in .env file!")

	# Model configuration
	model_name = "mistralai/Mistral-7B-v0.3"

	# Load tokenizer
	tokenizer = LlamaTokenizer.from_pretrained(model_name)

	# Check if torch is correctly installed
	try:
	assert torch.cuda.is_available(), "CUDA is not available. Install CUDA or use CPU mode."
	except AssertionError as e:
	print(e)
	print("Falling back to CPU mode.")
	device_map = "cpu"
	else:
	device_map = "auto"

	# 4-bit quantization for better performance
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True, # Change to load_in_8bit=True if needed
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.float16
	)

	# Load model with optimized settings
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	quantization_config=bnb_config,
	device_map=device_map,
	torch_dtype=torch.float16
	)

	def respond(message, history):
	inputs = tokenizer(
	f"User: {message}\nAssistant:",
	return_tensors="pt",
	return_attention_mask=True
	).to(model.device)

	outputs = model.generate(
	**inputs,
	max_new_tokens=256,
	temperature=0.7,
	do_sample=True
	)

	return tokenizer.decode(outputs[0], skip_special_tokens=True).split("Assistant:")[-1]

	# Create optimized interface
	gr.ChatInterface(
	respond,
	title="Shërbimi i Konsumatorit",
	examples=["Si mund të rivendos fjalëkalimin?", "A e keni në dispozicion këtë produkt?"],
	cache_examples=True
	).launch(server_port=7860, share=True)