MyName_RPG / llm /sample.py
AGofficial's picture
Upload 96 files
e95022e verified
Raw
History Blame Contribute Delete
3.47 kB
import os
from pathlib import Path
from typing import Iterable, Optional
try:
from llama_cpp import Llama
except ImportError:
Llama = None
MODEL_PATH = Path(__file__).parent / "dolphin.gguf"
_llm = None
def get_llm():
global _llm
if Llama is None:
raise RuntimeError("llama-cpp-python is not installed. Install requirements.txt to enable local AI replies.")
if _llm is None:
if not MODEL_PATH.exists():
raise FileNotFoundError(f"Model not found at {MODEL_PATH}")
print(f"Loading model from {MODEL_PATH}...")
_llm = Llama(
model_path=str(MODEL_PATH),
n_ctx=4096,
n_threads=min(os.cpu_count() or 4, 8),
n_batch=512,
verbose=False,
)
return _llm
def ask(
prompt: str,
system_prompt: Optional[str] = None,
history: Optional[Iterable[dict]] = None,
temperature: float = 0.7,
max_tokens: int = 512,
) -> str:
if system_prompt is None:
system_prompt = ""
messages = [{"role": "system", "content": system_prompt}]
if history:
for item in history:
role = item.get("role")
content = str(item.get("content", "")).strip()
if role in {"user", "assistant"} and content:
messages.append({"role": role, "content": content})
messages.append({"role": "user", "content": prompt})
response = get_llm().create_chat_completion(
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
)
return response["choices"][0]["message"]["content"].strip()
def stream_chat(
messages: Iterable[dict],
temperature: float = 0.7,
max_tokens: int = 180,
) -> Iterable[str]:
clean_messages = []
for item in messages:
role = item.get("role")
content = str(item.get("content", "")).strip()
if role in {"system", "user", "assistant"} and content:
clean_messages.append({"role": role, "content": content})
if not clean_messages:
raise ValueError("stream_chat requires at least one message.")
stream = get_llm().create_chat_completion(
messages=clean_messages,
temperature=temperature,
max_tokens=max_tokens,
stream=True,
)
for chunk in stream:
choices = chunk.get("choices") or []
if not choices:
continue
delta = choices[0].get("delta") or {}
content = delta.get("content")
if content:
yield content
def stream_ask(
prompt: str,
system_prompt: Optional[str] = None,
history: Optional[Iterable[dict]] = None,
temperature: float = 0.7,
max_tokens: int = 180,
) -> Iterable[str]:
if system_prompt is None:
system_prompt = ""
messages = [{"role": "system", "content": system_prompt}]
if history:
for item in history:
role = item.get("role")
content = str(item.get("content", "")).strip()
if role in {"user", "assistant"} and content:
messages.append({"role": role, "content": content})
messages.append({"role": "user", "content": prompt})
return stream_chat(messages, temperature=temperature, max_tokens=max_tokens)
if __name__ == "__main__":
while True:
user_input = input("You: ")
if user_input.lower() in {"exit", "quit"}:
break
reply = ask(user_input)
print(f"Model: {reply}")