Instructions to use AGofficial/MyName_RPG with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- llama-cpp-python
How to use AGofficial/MyName_RPG with llama-cpp-python:
# !pip install llama-cpp-python from llama_cpp import Llama llm = Llama.from_pretrained( repo_id="AGofficial/MyName_RPG", filename="llm/dolphin.gguf", )
llm.create_chat_completion( messages = "No input example has been defined for this model task." )
- Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- llama.cpp
How to use AGofficial/MyName_RPG with llama.cpp:
Install (macOS, Linux)
curl -LsSf https://llama.app/install.sh | sh # Start a local OpenAI-compatible server with a web UI: llama serve -hf AGofficial/MyName_RPG # Run inference directly in the terminal: llama cli -hf AGofficial/MyName_RPG
Install from WinGet (Windows)
winget install llama.cpp # Start a local OpenAI-compatible server with a web UI: llama serve -hf AGofficial/MyName_RPG # Run inference directly in the terminal: llama cli -hf AGofficial/MyName_RPG
Use pre-built binary
# Download pre-built binary from: # https://github.com/ggerganov/llama.cpp/releases # Start a local OpenAI-compatible server with a web UI: ./llama-server -hf AGofficial/MyName_RPG # Run inference directly in the terminal: ./llama-cli -hf AGofficial/MyName_RPG
Build from source code
git clone https://github.com/ggerganov/llama.cpp.git cd llama.cpp cmake -B build cmake --build build -j --target llama-server llama-cli # Start a local OpenAI-compatible server with a web UI: ./build/bin/llama-server -hf AGofficial/MyName_RPG # Run inference directly in the terminal: ./build/bin/llama-cli -hf AGofficial/MyName_RPG
Use Docker
docker model run hf.co/AGofficial/MyName_RPG
- LM Studio
- Jan
- Ollama
How to use AGofficial/MyName_RPG with Ollama:
ollama run hf.co/AGofficial/MyName_RPG
- Unsloth Studio
How to use AGofficial/MyName_RPG with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for AGofficial/MyName_RPG to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for AGofficial/MyName_RPG to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for AGofficial/MyName_RPG to start chatting
- Atomic Chat new
- Docker Model Runner
How to use AGofficial/MyName_RPG with Docker Model Runner:
docker model run hf.co/AGofficial/MyName_RPG
- Lemonade
How to use AGofficial/MyName_RPG with Lemonade:
Pull the model
# Download Lemonade from https://lemonade-server.ai/ lemonade pull AGofficial/MyName_RPG
Run and chat with the model
lemonade run user.MyName_RPG-{{QUANT_TAG}}List all available models
lemonade list
| import os | |
| from pathlib import Path | |
| from typing import Iterable, Optional | |
| try: | |
| from llama_cpp import Llama | |
| except ImportError: | |
| Llama = None | |
| MODEL_PATH = Path(__file__).parent / "dolphin.gguf" | |
| _llm = None | |
| def get_llm(): | |
| global _llm | |
| if Llama is None: | |
| raise RuntimeError("llama-cpp-python is not installed. Install requirements.txt to enable local AI replies.") | |
| if _llm is None: | |
| if not MODEL_PATH.exists(): | |
| raise FileNotFoundError(f"Model not found at {MODEL_PATH}") | |
| print(f"Loading model from {MODEL_PATH}...") | |
| _llm = Llama( | |
| model_path=str(MODEL_PATH), | |
| n_ctx=4096, | |
| n_threads=min(os.cpu_count() or 4, 8), | |
| n_batch=512, | |
| verbose=False, | |
| ) | |
| return _llm | |
| def ask( | |
| prompt: str, | |
| system_prompt: Optional[str] = None, | |
| history: Optional[Iterable[dict]] = None, | |
| temperature: float = 0.7, | |
| max_tokens: int = 512, | |
| ) -> str: | |
| if system_prompt is None: | |
| system_prompt = "" | |
| messages = [{"role": "system", "content": system_prompt}] | |
| if history: | |
| for item in history: | |
| role = item.get("role") | |
| content = str(item.get("content", "")).strip() | |
| if role in {"user", "assistant"} and content: | |
| messages.append({"role": role, "content": content}) | |
| messages.append({"role": "user", "content": prompt}) | |
| response = get_llm().create_chat_completion( | |
| messages=messages, | |
| temperature=temperature, | |
| max_tokens=max_tokens, | |
| ) | |
| return response["choices"][0]["message"]["content"].strip() | |
| def stream_chat( | |
| messages: Iterable[dict], | |
| temperature: float = 0.7, | |
| max_tokens: int = 180, | |
| ) -> Iterable[str]: | |
| clean_messages = [] | |
| for item in messages: | |
| role = item.get("role") | |
| content = str(item.get("content", "")).strip() | |
| if role in {"system", "user", "assistant"} and content: | |
| clean_messages.append({"role": role, "content": content}) | |
| if not clean_messages: | |
| raise ValueError("stream_chat requires at least one message.") | |
| stream = get_llm().create_chat_completion( | |
| messages=clean_messages, | |
| temperature=temperature, | |
| max_tokens=max_tokens, | |
| stream=True, | |
| ) | |
| for chunk in stream: | |
| choices = chunk.get("choices") or [] | |
| if not choices: | |
| continue | |
| delta = choices[0].get("delta") or {} | |
| content = delta.get("content") | |
| if content: | |
| yield content | |
| def stream_ask( | |
| prompt: str, | |
| system_prompt: Optional[str] = None, | |
| history: Optional[Iterable[dict]] = None, | |
| temperature: float = 0.7, | |
| max_tokens: int = 180, | |
| ) -> Iterable[str]: | |
| if system_prompt is None: | |
| system_prompt = "" | |
| messages = [{"role": "system", "content": system_prompt}] | |
| if history: | |
| for item in history: | |
| role = item.get("role") | |
| content = str(item.get("content", "")).strip() | |
| if role in {"user", "assistant"} and content: | |
| messages.append({"role": role, "content": content}) | |
| messages.append({"role": "user", "content": prompt}) | |
| return stream_chat(messages, temperature=temperature, max_tokens=max_tokens) | |
| if __name__ == "__main__": | |
| while True: | |
| user_input = input("You: ") | |
| if user_input.lower() in {"exit", "quit"}: | |
| break | |
| reply = ask(user_input) | |
| print(f"Model: {reply}") | |