Update README.md
Browse files
README.md
CHANGED
|
@@ -31,3 +31,84 @@ Install the necessary dependencies with:
|
|
| 31 |
```bash
|
| 32 |
pip install llama-cpp-python huggingface_hub
|
| 33 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
```bash
|
| 32 |
pip install llama-cpp-python huggingface_hub
|
| 33 |
```
|
| 34 |
+
Load the model with:
|
| 35 |
+
|
| 36 |
+
```bash
|
| 37 |
+
import logging
|
| 38 |
+
import os
|
| 39 |
+
import time # Make sure to import time for measuring durations
|
| 40 |
+
from huggingface_hub import hf_hub_download
|
| 41 |
+
from llama_cpp import Llama
|
| 42 |
+
|
| 43 |
+
# Set up logging
|
| 44 |
+
logging.basicConfig(level=logging.INFO) # Set to INFO to reduce overhead
|
| 45 |
+
logger = logging.getLogger()
|
| 46 |
+
|
| 47 |
+
# Download the GGUF model
|
| 48 |
+
model_name = "Gargaz/GPT-2-gguf"
|
| 49 |
+
model_file = "llama3.1-Q4_K_M.gguf"
|
| 50 |
+
model_path = hf_hub_download(model_name, filename=model_file)
|
| 51 |
+
|
| 52 |
+
# Instantiate the model from the downloaded file
|
| 53 |
+
llm = Llama(
|
| 54 |
+
model_path=model_path,
|
| 55 |
+
n_ctx=16000, # Context length to use
|
| 56 |
+
n_threads=64, # Number of CPU threads
|
| 57 |
+
n_gpu_layers=32 # Number of model layers to offload to GPU
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
# System instructions for the AI
|
| 61 |
+
system_instructions = (
|
| 62 |
+
"You are a friendly conversational AI designed to respond clearly and concisely to user inquiries. "
|
| 63 |
+
"Stay on topic by answering questions directly, use a warm tone and acknowledge gratitude, ask for "
|
| 64 |
+
"clarification on vague questions, provide brief and helpful recommendations, and encourage users "
|
| 65 |
+
"to ask more questions to keep the conversation flowing."
|
| 66 |
+
"don't speak alone always respond just to the user input"
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
def chat():
|
| 70 |
+
"""Start a chat session with the model."""
|
| 71 |
+
print("Introduceti 'exit' pentru a iesi din chat.")
|
| 72 |
+
while True:
|
| 73 |
+
user_input = input("Tu: ")
|
| 74 |
+
if user_input.lower() == 'exit':
|
| 75 |
+
print("Iesire din chat.")
|
| 76 |
+
break
|
| 77 |
+
|
| 78 |
+
# Prepare the prompt
|
| 79 |
+
full_prompt = f"{system_instructions}\nUser: {user_input}\nAI:"
|
| 80 |
+
|
| 81 |
+
# Limit AI responses to a maximum of 500 tokens for faster responses
|
| 82 |
+
generation_kwargs = {
|
| 83 |
+
"max_tokens": 40, # Reduced max tokens for faster inference
|
| 84 |
+
"stop": ["AI:"], # Change the stop token to ensure clarity
|
| 85 |
+
"echo": False,
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
try:
|
| 89 |
+
# Start measuring time for response generation
|
| 90 |
+
load_start_time = time.time()
|
| 91 |
+
res = llm(full_prompt, **generation_kwargs) # Res is a dictionary
|
| 92 |
+
load_time = (time.time() - load_start_time) * 1000 # Convert to ms
|
| 93 |
+
|
| 94 |
+
# Log load time
|
| 95 |
+
load_message = f"llama_perf_context_print: load time = {load_time:.2f} ms"
|
| 96 |
+
logger.info(load_message)
|
| 97 |
+
|
| 98 |
+
generated_text = res["choices"][0]["text"].strip()
|
| 99 |
+
print(f"AI: {generated_text}")
|
| 100 |
+
|
| 101 |
+
# Log prompt evaluation time and other metrics
|
| 102 |
+
num_tokens = len(full_prompt.split())
|
| 103 |
+
eval_message = f"llama_perf_context_print: prompt eval time = {load_time:.2f} ms / {num_tokens} tokens"
|
| 104 |
+
logger.info(eval_message)
|
| 105 |
+
|
| 106 |
+
except Exception as e:
|
| 107 |
+
logger.error(f"Error generating response: {e}")
|
| 108 |
+
print("Eroare la generarea răspunsului.")
|
| 109 |
+
|
| 110 |
+
if __name__ == "__main__":
|
| 111 |
+
chat()
|
| 112 |
+
|
| 113 |
+
```
|
| 114 |
+
|