| | import subprocess |
| | import sys |
| |
|
| | |
| | def install(package): |
| | subprocess.check_call([sys.executable, "-m", "pip", "install", package]) |
| |
|
| | |
| | packages = [ |
| | "transformers==4.34.0", |
| | "torch==2.0.1+cu118", |
| | "gradio==3.39.0", |
| | "accelerate==0.23.0", |
| | "bitsandbytes==0.41.1", |
| | "sentencepiece==0.1.99", |
| | "python-dotenv==1.0.0" |
| | ] |
| |
|
| | for package in packages: |
| | try: |
| | install(package) |
| | except Exception as e: |
| | print(f"Failed to install {package}: {e}") |
| |
|
| | |
| | try: |
| | import bitsandbytes |
| | except ImportError: |
| | install("bitsandbytes -i https://test.pypi.org/simple/") |
| |
|
| | from transformers import LlamaTokenizer, AutoModelForCausalLM, BitsAndBytesConfig |
| | import gradio as gr |
| | from huggingface_hub import login |
| | from dotenv import load_dotenv |
| | import os |
| | import torch |
| |
|
| | |
| | load_dotenv() |
| |
|
| | |
| | huggingface_token = os.getenv("HUGGINGFACE_TOKEN") |
| | if huggingface_token: |
| | login(token=huggingface_token) |
| | else: |
| | raise ValueError("HUGGINGFACE_TOKEN is missing in .env file!") |
| |
|
| | |
| | model_name = "mistralai/Mistral-7B-v0.3" |
| |
|
| | |
| | tokenizer = LlamaTokenizer.from_pretrained(model_name) |
| |
|
| | |
| | try: |
| | assert torch.cuda.is_available(), "CUDA is not available. Install CUDA or use CPU mode." |
| | except AssertionError as e: |
| | print(e) |
| | print("Falling back to CPU mode.") |
| | device_map = "cpu" |
| | else: |
| | device_map = "auto" |
| |
|
| | |
| | bnb_config = BitsAndBytesConfig( |
| | load_in_4bit=True, |
| | bnb_4bit_quant_type="nf4", |
| | bnb_4bit_compute_dtype=torch.float16 |
| | ) |
| |
|
| | |
| | model = AutoModelForCausalLM.from_pretrained( |
| | model_name, |
| | quantization_config=bnb_config, |
| | device_map=device_map, |
| | torch_dtype=torch.float16 |
| | ) |
| |
|
| | def respond(message, history): |
| | inputs = tokenizer( |
| | f"User: {message}\nAssistant:", |
| | return_tensors="pt", |
| | return_attention_mask=True |
| | ).to(model.device) |
| | |
| | outputs = model.generate( |
| | **inputs, |
| | max_new_tokens=256, |
| | temperature=0.7, |
| | do_sample=True |
| | ) |
| | |
| | return tokenizer.decode(outputs[0], skip_special_tokens=True).split("Assistant:")[-1] |
| |
|
| | |
| | gr.ChatInterface( |
| | respond, |
| | title="Shërbimi i Konsumatorit", |
| | examples=["Si mund të rivendos fjalëkalimin?", "A e keni në dispozicion këtë produkt?"], |
| | cache_examples=True |
| | ).launch(server_port=7860, share=True) |
| |
|