Upload folder using huggingface_hub
Browse files- .env.example +33 -0
- .gitignore +197 -0
- Dockerfile +20 -0
- LICENSE +201 -0
- Makefile +119 -0
- Modelfile +21 -0
- Modelfile.template +41 -0
- README.md +388 -3
- app/main.py +124 -0
- convert_to_gguf.sh +164 -0
- create_custom_model.py +373 -0
- docker-compose.yml +76 -0
- example_training_data.json +32 -0
- model_requirements.txt +19 -0
- quick-start.sh +131 -0
- requirements.txt +3 -0
- test_converted_model.py +273 -0
.env.example
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ollama Configuration
|
| 2 |
+
OLLAMA_HOST=0.0.0.0
|
| 3 |
+
OLLAMA_PORT=11436
|
| 4 |
+
|
| 5 |
+
# Streamlit Configuration
|
| 6 |
+
STREAMLIT_SERVER_PORT=8501
|
| 7 |
+
STREAMLIT_SERVER_ADDRESS=0.0.0.0
|
| 8 |
+
STREAMLIT_SERVER_HEADLESS=true
|
| 9 |
+
STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
|
| 10 |
+
|
| 11 |
+
# Application Settings
|
| 12 |
+
APP_DEBUG=false
|
| 13 |
+
APP_ENV=development
|
| 14 |
+
APP_SECRET_KEY=your-secret-key-here
|
| 15 |
+
|
| 16 |
+
# Model Configuration
|
| 17 |
+
DEFAULT_MODEL=mistral:7b-instruct
|
| 18 |
+
MODEL_TEMPERATURE=0.7
|
| 19 |
+
MAX_TOKENS=2000
|
| 20 |
+
|
| 21 |
+
# API Configuration (if needed)
|
| 22 |
+
# API_KEY=your-api-key-here
|
| 23 |
+
# API_BASE_URL=http://localhost:11434
|
| 24 |
+
|
| 25 |
+
# Database Configuration (if needed)
|
| 26 |
+
# DB_HOST=db
|
| 27 |
+
# DB_PORT=5432
|
| 28 |
+
# DB_NAME=llm_demo
|
| 29 |
+
# DB_USER=postgres
|
| 30 |
+
# DB_PASSWORD=your-db-password
|
| 31 |
+
|
| 32 |
+
# CORS Configuration (if needed)
|
| 33 |
+
# CORS_ORIGINS=http://localhost:3000,http://localhost:8501
|
.gitignore
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.idea
|
| 2 |
+
.env
|
| 3 |
+
venv
|
| 4 |
+
# Byte-compiled / optimized / DLL files
|
| 5 |
+
__pycache__/
|
| 6 |
+
*.py[cod]
|
| 7 |
+
*$py.class
|
| 8 |
+
|
| 9 |
+
# C extensions
|
| 10 |
+
*.so
|
| 11 |
+
|
| 12 |
+
# Distribution / packaging
|
| 13 |
+
.Python
|
| 14 |
+
build/
|
| 15 |
+
develop-eggs/
|
| 16 |
+
dist/
|
| 17 |
+
downloads/
|
| 18 |
+
eggs/
|
| 19 |
+
.eggs/
|
| 20 |
+
lib/
|
| 21 |
+
lib64/
|
| 22 |
+
parts/
|
| 23 |
+
sdist/
|
| 24 |
+
var/
|
| 25 |
+
wheels/
|
| 26 |
+
share/python-wheels/
|
| 27 |
+
*.egg-info/
|
| 28 |
+
.installed.cfg
|
| 29 |
+
*.egg
|
| 30 |
+
MANIFEST
|
| 31 |
+
|
| 32 |
+
# PyInstaller
|
| 33 |
+
# Usually these files are written by a python script from a template
|
| 34 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 35 |
+
*.manifest
|
| 36 |
+
*.spec
|
| 37 |
+
|
| 38 |
+
# Installer logs
|
| 39 |
+
pip-log.txt
|
| 40 |
+
pip-delete-this-directory.txt
|
| 41 |
+
|
| 42 |
+
# Unit test / coverage reports
|
| 43 |
+
htmlcov/
|
| 44 |
+
.tox/
|
| 45 |
+
.nox/
|
| 46 |
+
.coverage
|
| 47 |
+
.coverage.*
|
| 48 |
+
.cache
|
| 49 |
+
nosetests.xml
|
| 50 |
+
coverage.xml
|
| 51 |
+
*.cover
|
| 52 |
+
*.py,cover
|
| 53 |
+
.hypothesis/
|
| 54 |
+
.pytest_cache/
|
| 55 |
+
cover/
|
| 56 |
+
|
| 57 |
+
# Translations
|
| 58 |
+
*.mo
|
| 59 |
+
*.pot
|
| 60 |
+
|
| 61 |
+
# Django stuff:
|
| 62 |
+
*.log
|
| 63 |
+
local_settings.py
|
| 64 |
+
db.sqlite3
|
| 65 |
+
db.sqlite3-journal
|
| 66 |
+
|
| 67 |
+
# Flask stuff:
|
| 68 |
+
instance/
|
| 69 |
+
.webassets-cache
|
| 70 |
+
|
| 71 |
+
# Scrapy stuff:
|
| 72 |
+
.scrapy
|
| 73 |
+
|
| 74 |
+
# Sphinx documentation
|
| 75 |
+
docs/_build/
|
| 76 |
+
|
| 77 |
+
# PyBuilder
|
| 78 |
+
.pybuilder/
|
| 79 |
+
target/
|
| 80 |
+
|
| 81 |
+
# Jupyter Notebook
|
| 82 |
+
.ipynb_checkpoints
|
| 83 |
+
|
| 84 |
+
# IPython
|
| 85 |
+
profile_default/
|
| 86 |
+
ipython_config.py
|
| 87 |
+
|
| 88 |
+
# pyenv
|
| 89 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 90 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 91 |
+
# .python-version
|
| 92 |
+
|
| 93 |
+
# pipenv
|
| 94 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 95 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 96 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 97 |
+
# install all needed dependencies.
|
| 98 |
+
#Pipfile.lock
|
| 99 |
+
|
| 100 |
+
# UV
|
| 101 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
| 102 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 103 |
+
# commonly ignored for libraries.
|
| 104 |
+
#uv.lock
|
| 105 |
+
|
| 106 |
+
# poetry
|
| 107 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 108 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 109 |
+
# commonly ignored for libraries.
|
| 110 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 111 |
+
#poetry.lock
|
| 112 |
+
|
| 113 |
+
# pdm
|
| 114 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 115 |
+
#pdm.lock
|
| 116 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 117 |
+
# in version control.
|
| 118 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
| 119 |
+
.pdm.toml
|
| 120 |
+
.pdm-python
|
| 121 |
+
.pdm-build/
|
| 122 |
+
|
| 123 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 124 |
+
__pypackages__/
|
| 125 |
+
|
| 126 |
+
# Celery stuff
|
| 127 |
+
celerybeat-schedule
|
| 128 |
+
celerybeat.pid
|
| 129 |
+
|
| 130 |
+
# SageMath parsed files
|
| 131 |
+
*.sage.py
|
| 132 |
+
|
| 133 |
+
# Environments
|
| 134 |
+
.env
|
| 135 |
+
.venv
|
| 136 |
+
env/
|
| 137 |
+
venv/
|
| 138 |
+
ENV/
|
| 139 |
+
env.bak/
|
| 140 |
+
venv.bak/
|
| 141 |
+
|
| 142 |
+
# Spyder project settings
|
| 143 |
+
.spyderproject
|
| 144 |
+
.spyproject
|
| 145 |
+
|
| 146 |
+
# Rope project settings
|
| 147 |
+
.ropeproject
|
| 148 |
+
|
| 149 |
+
# mkdocs documentation
|
| 150 |
+
/site
|
| 151 |
+
|
| 152 |
+
# mypy
|
| 153 |
+
.mypy_cache/
|
| 154 |
+
.dmypy.json
|
| 155 |
+
dmypy.json
|
| 156 |
+
|
| 157 |
+
# Pyre type checker
|
| 158 |
+
.pyre/
|
| 159 |
+
|
| 160 |
+
# pytype static type analyzer
|
| 161 |
+
.pytype/
|
| 162 |
+
|
| 163 |
+
# Cython debug symbols
|
| 164 |
+
cython_debug/
|
| 165 |
+
|
| 166 |
+
# PyCharm
|
| 167 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 168 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 169 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 170 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 171 |
+
#.idea/
|
| 172 |
+
|
| 173 |
+
# Abstra
|
| 174 |
+
# Abstra is an AI-powered process automation framework.
|
| 175 |
+
# Ignore directories containing user credentials, local state, and settings.
|
| 176 |
+
# Learn more at https://abstra.io/docs
|
| 177 |
+
.abstra/
|
| 178 |
+
|
| 179 |
+
# Visual Studio Code
|
| 180 |
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
| 181 |
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
| 182 |
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
| 183 |
+
# you could uncomment the following to ignore the enitre vscode folder
|
| 184 |
+
# .vscode/
|
| 185 |
+
|
| 186 |
+
# Ruff stuff:
|
| 187 |
+
.ruff_cache/
|
| 188 |
+
|
| 189 |
+
# PyPI configuration file
|
| 190 |
+
.pypirc
|
| 191 |
+
|
| 192 |
+
# Cursor
|
| 193 |
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
| 194 |
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
| 195 |
+
# refer to https://docs.cursor.com/context/ignore-files
|
| 196 |
+
.cursorignore
|
| 197 |
+
.cursorindexingignore
|
Dockerfile
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
# Minimalne zależności
|
| 4 |
+
RUN apt-get update && apt-get install -y \
|
| 5 |
+
curl \
|
| 6 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 7 |
+
|
| 8 |
+
# Python dependencies (tylko 3 pakiety!)
|
| 9 |
+
COPY requirements.txt .
|
| 10 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 11 |
+
|
| 12 |
+
# Aplikacja
|
| 13 |
+
WORKDIR /app
|
| 14 |
+
COPY app/ .
|
| 15 |
+
|
| 16 |
+
# Port Streamlit
|
| 17 |
+
EXPOSE 8501
|
| 18 |
+
|
| 19 |
+
# Uruchomienie
|
| 20 |
+
CMD ["streamlit", "run", "main.py", "--server.address", "0.0.0.0", "--server.port", "8501"]
|
LICENSE
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Apache License
|
| 2 |
+
Version 2.0, January 2004
|
| 3 |
+
http://www.apache.org/licenses/
|
| 4 |
+
|
| 5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
| 6 |
+
|
| 7 |
+
1. Definitions.
|
| 8 |
+
|
| 9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
| 10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
| 11 |
+
|
| 12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
| 13 |
+
the copyright owner that is granting the License.
|
| 14 |
+
|
| 15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
| 16 |
+
other entities that control, are controlled by, or are under common
|
| 17 |
+
control with that entity. For the purposes of this definition,
|
| 18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
| 19 |
+
direction or management of such entity, whether by contract or
|
| 20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
| 21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
| 22 |
+
|
| 23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
| 24 |
+
exercising permissions granted by this License.
|
| 25 |
+
|
| 26 |
+
"Source" form shall mean the preferred form for making modifications,
|
| 27 |
+
including but not limited to software source code, documentation
|
| 28 |
+
source, and configuration files.
|
| 29 |
+
|
| 30 |
+
"Object" form shall mean any form resulting from mechanical
|
| 31 |
+
transformation or translation of a Source form, including but
|
| 32 |
+
not limited to compiled object code, generated documentation,
|
| 33 |
+
and conversions to other media types.
|
| 34 |
+
|
| 35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
| 36 |
+
Object form, made available under the License, as indicated by a
|
| 37 |
+
copyright notice that is included in or attached to the work
|
| 38 |
+
(an example is provided in the Appendix below).
|
| 39 |
+
|
| 40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
| 41 |
+
form, that is based on (or derived from) the Work and for which the
|
| 42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
| 43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
| 44 |
+
of this License, Derivative Works shall not include works that remain
|
| 45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
| 46 |
+
the Work and Derivative Works thereof.
|
| 47 |
+
|
| 48 |
+
"Contribution" shall mean any work of authorship, including
|
| 49 |
+
the original version of the Work and any modifications or additions
|
| 50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
| 51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
| 52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
| 53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
| 54 |
+
means any form of electronic, verbal, or written communication sent
|
| 55 |
+
to the Licensor or its representatives, including but not limited to
|
| 56 |
+
communication on electronic mailing lists, source code control systems,
|
| 57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
| 58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
| 59 |
+
excluding communication that is conspicuously marked or otherwise
|
| 60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
| 61 |
+
|
| 62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
| 63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
| 64 |
+
subsequently incorporated within the Work.
|
| 65 |
+
|
| 66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
| 67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
| 70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
| 71 |
+
Work and such Derivative Works in Source or Object form.
|
| 72 |
+
|
| 73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
| 74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
| 75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 76 |
+
(except as stated in this section) patent license to make, have made,
|
| 77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
| 78 |
+
where such license applies only to those patent claims licensable
|
| 79 |
+
by such Contributor that are necessarily infringed by their
|
| 80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
| 81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
| 82 |
+
institute patent litigation against any entity (including a
|
| 83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
| 84 |
+
or a Contribution incorporated within the Work constitutes direct
|
| 85 |
+
or contributory patent infringement, then any patent licenses
|
| 86 |
+
granted to You under this License for that Work shall terminate
|
| 87 |
+
as of the date such litigation is filed.
|
| 88 |
+
|
| 89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
| 90 |
+
Work or Derivative Works thereof in any medium, with or without
|
| 91 |
+
modifications, and in Source or Object form, provided that You
|
| 92 |
+
meet the following conditions:
|
| 93 |
+
|
| 94 |
+
(a) You must give any other recipients of the Work or
|
| 95 |
+
Derivative Works a copy of this License; and
|
| 96 |
+
|
| 97 |
+
(b) You must cause any modified files to carry prominent notices
|
| 98 |
+
stating that You changed the files; and
|
| 99 |
+
|
| 100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
| 101 |
+
that You distribute, all copyright, patent, trademark, and
|
| 102 |
+
attribution notices from the Source form of the Work,
|
| 103 |
+
excluding those notices that do not pertain to any part of
|
| 104 |
+
the Derivative Works; and
|
| 105 |
+
|
| 106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
| 107 |
+
distribution, then any Derivative Works that You distribute must
|
| 108 |
+
include a readable copy of the attribution notices contained
|
| 109 |
+
within such NOTICE file, excluding those notices that do not
|
| 110 |
+
pertain to any part of the Derivative Works, in at least one
|
| 111 |
+
of the following places: within a NOTICE text file distributed
|
| 112 |
+
as part of the Derivative Works; within the Source form or
|
| 113 |
+
documentation, if provided along with the Derivative Works; or,
|
| 114 |
+
within a display generated by the Derivative Works, if and
|
| 115 |
+
wherever such third-party notices normally appear. The contents
|
| 116 |
+
of the NOTICE file are for informational purposes only and
|
| 117 |
+
do not modify the License. You may add Your own attribution
|
| 118 |
+
notices within Derivative Works that You distribute, alongside
|
| 119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
| 120 |
+
that such additional attribution notices cannot be construed
|
| 121 |
+
as modifying the License.
|
| 122 |
+
|
| 123 |
+
You may add Your own copyright statement to Your modifications and
|
| 124 |
+
may provide additional or different license terms and conditions
|
| 125 |
+
for use, reproduction, or distribution of Your modifications, or
|
| 126 |
+
for any such Derivative Works as a whole, provided Your use,
|
| 127 |
+
reproduction, and distribution of the Work otherwise complies with
|
| 128 |
+
the conditions stated in this License.
|
| 129 |
+
|
| 130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
| 131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
| 132 |
+
by You to the Licensor shall be under the terms and conditions of
|
| 133 |
+
this License, without any additional terms or conditions.
|
| 134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
| 135 |
+
the terms of any separate license agreement you may have executed
|
| 136 |
+
with Licensor regarding such Contributions.
|
| 137 |
+
|
| 138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
| 139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
| 140 |
+
except as required for reasonable and customary use in describing the
|
| 141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
| 142 |
+
|
| 143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
| 144 |
+
agreed to in writing, Licensor provides the Work (and each
|
| 145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
| 146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
| 147 |
+
implied, including, without limitation, any warranties or conditions
|
| 148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
| 149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
| 150 |
+
appropriateness of using or redistributing the Work and assume any
|
| 151 |
+
risks associated with Your exercise of permissions under this License.
|
| 152 |
+
|
| 153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
| 154 |
+
whether in tort (including negligence), contract, or otherwise,
|
| 155 |
+
unless required by applicable law (such as deliberate and grossly
|
| 156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
| 157 |
+
liable to You for damages, including any direct, indirect, special,
|
| 158 |
+
incidental, or consequential damages of any character arising as a
|
| 159 |
+
result of this License or out of the use or inability to use the
|
| 160 |
+
Work (including but not limited to damages for loss of goodwill,
|
| 161 |
+
work stoppage, computer failure or malfunction, or any and all
|
| 162 |
+
other commercial damages or losses), even if such Contributor
|
| 163 |
+
has been advised of the possibility of such damages.
|
| 164 |
+
|
| 165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
| 166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
| 167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
| 168 |
+
or other liability obligations and/or rights consistent with this
|
| 169 |
+
License. However, in accepting such obligations, You may act only
|
| 170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
| 171 |
+
of any other Contributor, and only if You agree to indemnify,
|
| 172 |
+
defend, and hold each Contributor harmless for any liability
|
| 173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
| 174 |
+
of your accepting any such warranty or additional liability.
|
| 175 |
+
|
| 176 |
+
END OF TERMS AND CONDITIONS
|
| 177 |
+
|
| 178 |
+
APPENDIX: How to apply the Apache License to your work.
|
| 179 |
+
|
| 180 |
+
To apply the Apache License to your work, attach the following
|
| 181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
| 182 |
+
replaced with your own identifying information. (Don't include
|
| 183 |
+
the brackets!) The text should be enclosed in the appropriate
|
| 184 |
+
comment syntax for the file format. We also recommend that a
|
| 185 |
+
file or class name and description of purpose be included on the
|
| 186 |
+
same "printed page" as the copyright notice for easier
|
| 187 |
+
identification within third-party archives.
|
| 188 |
+
|
| 189 |
+
Copyright [yyyy] [name of copyright owner]
|
| 190 |
+
|
| 191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
| 192 |
+
you may not use this file except in compliance with the License.
|
| 193 |
+
You may obtain a copy of the License at
|
| 194 |
+
|
| 195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
| 196 |
+
|
| 197 |
+
Unless required by applicable law or agreed to in writing, software
|
| 198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
| 199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 200 |
+
See the License for the specific language governing permissions and
|
| 201 |
+
limitations under the License.
|
Makefile
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.PHONY: help install build up down stop restart logs clean test lint format check-env open open-ui open-ollama
|
| 2 |
+
|
| 3 |
+
# Default target
|
| 4 |
+
help:
|
| 5 |
+
@echo "\nLLM Demo - Available commands:\n"
|
| 6 |
+
@echo " make install Install Python dependencies"
|
| 7 |
+
@echo " make build Build Docker containers"
|
| 8 |
+
@echo " make up Start all services in detached mode"
|
| 9 |
+
@echo " make down Stop and remove all containers, networks, and volumes"
|
| 10 |
+
@echo " make stop Stop all running containers"
|
| 11 |
+
@echo " make restart Restart all services"
|
| 12 |
+
@echo " make logs Follow container logs"
|
| 13 |
+
@echo " make logs-ollama Follow Ollama container logs"
|
| 14 |
+
@echo " make logs-ui Follow Streamlit UI logs"
|
| 15 |
+
@echo " make clean Remove all containers, networks, and volumes"
|
| 16 |
+
@echo " make test Run tests"
|
| 17 |
+
@echo " make lint Run linter"
|
| 18 |
+
@echo " make format Format code"
|
| 19 |
+
@echo " make shell-ollama Open shell in Ollama container"
|
| 20 |
+
@echo " make shell-ui Open shell in Streamlit UI container"
|
| 21 |
+
@echo " make open Open all services in browser"
|
| 22 |
+
@echo " make open-ui Open Streamlit UI in browser"
|
| 23 |
+
@echo " make open-ollama Open Ollama API in browser"
|
| 24 |
+
|
| 25 |
+
# Check if .env file exists
|
| 26 |
+
check-env:
|
| 27 |
+
@if [ ! -f .env ]; then \
|
| 28 |
+
echo "Error: .env file not found. Please create one from .env.example"; \
|
| 29 |
+
exit 1; \
|
| 30 |
+
fi
|
| 31 |
+
|
| 32 |
+
# Install Python dependencies
|
| 33 |
+
install:
|
| 34 |
+
@echo "Installing Python dependencies..."
|
| 35 |
+
python -m pip install --upgrade pip
|
| 36 |
+
pip install -r requirements.txt
|
| 37 |
+
|
| 38 |
+
# Build Docker containers
|
| 39 |
+
build: check-env
|
| 40 |
+
@echo "Building Docker containers..."
|
| 41 |
+
docker-compose build
|
| 42 |
+
|
| 43 |
+
# Start all services in detached mode
|
| 44 |
+
up: check-env
|
| 45 |
+
@echo "Starting all services..."
|
| 46 |
+
docker-compose up -d
|
| 47 |
+
|
| 48 |
+
# Stop and remove all containers, networks, and volumes
|
| 49 |
+
down:
|
| 50 |
+
@echo "Stopping and removing all containers..."
|
| 51 |
+
docker-compose down -v
|
| 52 |
+
|
| 53 |
+
# Stop and remove all containers, networks, and images
|
| 54 |
+
stop:
|
| 55 |
+
@echo "Stopping and removing all containers, networks, and images..."
|
| 56 |
+
docker-compose down --rmi all --volumes --remove-orphans
|
| 57 |
+
@echo "Removing unused Docker resources..."
|
| 58 |
+
docker system prune -a -f --volumes
|
| 59 |
+
@echo "Removing all unused Docker networks..."
|
| 60 |
+
docker network prune -f
|
| 61 |
+
@echo "Removing all unused Docker volumes..."
|
| 62 |
+
docker volume prune -f
|
| 63 |
+
|
| 64 |
+
# Restart all services
|
| 65 |
+
restart: stop up
|
| 66 |
+
|
| 67 |
+
# Follow container logs
|
| 68 |
+
logs:
|
| 69 |
+
docker-compose logs -f
|
| 70 |
+
|
| 71 |
+
# Follow Ollama container logs
|
| 72 |
+
logs-ollama:
|
| 73 |
+
docker-compose logs -f ollama
|
| 74 |
+
|
| 75 |
+
# Follow Streamlit UI logs
|
| 76 |
+
logs-ui:
|
| 77 |
+
docker-compose logs -f streamlit-ui
|
| 78 |
+
|
| 79 |
+
# Alias for stop (for backward compatibility)
|
| 80 |
+
clean: stop
|
| 81 |
+
|
| 82 |
+
# Run tests
|
| 83 |
+
test:
|
| 84 |
+
@echo "Running tests..."
|
| 85 |
+
# Add your test command here
|
| 86 |
+
# Example: python -m pytest tests/
|
| 87 |
+
|
| 88 |
+
# Lint code
|
| 89 |
+
lint:
|
| 90 |
+
@echo "Running linter..."
|
| 91 |
+
# Add your lint command here
|
| 92 |
+
# Example: pylint app/
|
| 93 |
+
|
| 94 |
+
# Format code
|
| 95 |
+
format:
|
| 96 |
+
@echo "Formatting code..."
|
| 97 |
+
# Add your format command here
|
| 98 |
+
# Example: black app/
|
| 99 |
+
|
| 100 |
+
# Open shell in Ollama container
|
| 101 |
+
shell-ollama:
|
| 102 |
+
docker-compose exec ollama /bin/sh
|
| 103 |
+
|
| 104 |
+
# Open shell in Streamlit UI container
|
| 105 |
+
shell-ui:
|
| 106 |
+
docker-compose exec streamlit-ui /bin/sh
|
| 107 |
+
|
| 108 |
+
# Open all services in browser
|
| 109 |
+
open: open-ui open-ollama
|
| 110 |
+
|
| 111 |
+
# Open Streamlit UI in browser
|
| 112 |
+
open-ui:
|
| 113 |
+
@echo "Opening Streamlit UI..."
|
| 114 |
+
@xdg-open http://localhost:8501 2>/dev/null || open http://localhost:8501 2>/dev/null || start http://localhost:8501 2>/dev/null || echo "Could not open the browser. Please open http://localhost:8501 manually"
|
| 115 |
+
|
| 116 |
+
# Open Ollama API in browser
|
| 117 |
+
open-ollama:
|
| 118 |
+
@echo "Opening Ollama API..."
|
| 119 |
+
@xdg-open http://localhost:11436 2>/dev/null || open http://localhost:11436 2>/dev/null || start http://localhost:11436 2>/dev/null || echo "Could not open the browser. Please open http://localhost:11436 manually"
|
Modelfile
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM ./my_custom_model.gguf
|
| 2 |
+
|
| 3 |
+
# Model metadata
|
| 4 |
+
PARAMETER temperature 0.7
|
| 5 |
+
PARAMETER top_p 0.9
|
| 6 |
+
PARAMETER top_k 40
|
| 7 |
+
PARAMETER num_ctx 2048
|
| 8 |
+
|
| 9 |
+
# System prompt
|
| 10 |
+
SYSTEM "Jesteś pomocnym asystentem AI stworzonym specjalnie dla polskich użytkowników.
|
| 11 |
+
Odpowiadasz w języku polskim, jesteś precyzyjny i pomocny.
|
| 12 |
+
Specjalizujesz się w programowaniu, technologii i sztucznej inteligencji."
|
| 13 |
+
|
| 14 |
+
# Chat template dla Mistral
|
| 15 |
+
TEMPLATE "<s>[INST] {{ if .System }}{{ .System }}{{ end }}{{ .Prompt }} [/INST] {{ .Response }}</s>"
|
| 16 |
+
|
| 17 |
+
# Metadata
|
| 18 |
+
PARAMETER num_predict 256
|
| 19 |
+
PARAMETER stop "<s>"
|
| 20 |
+
PARAMETER stop "[INST]"
|
| 21 |
+
PARAMETER stop "[/INST]"
|
Modelfile.template
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Modelfile for custom fine-tuned Mistral model
|
| 2 |
+
# Generated by convert_to_gguf.sh
|
| 3 |
+
|
| 4 |
+
FROM ./my_custom_model.gguf
|
| 5 |
+
|
| 6 |
+
# Model parameters optimized for RTX 3050
|
| 7 |
+
PARAMETER temperature 0.7
|
| 8 |
+
PARAMETER top_p 0.9
|
| 9 |
+
PARAMETER top_k 40
|
| 10 |
+
PARAMETER num_ctx 2048
|
| 11 |
+
PARAMETER num_predict 512
|
| 12 |
+
PARAMETER repeat_penalty 1.1
|
| 13 |
+
PARAMETER repeat_last_n 64
|
| 14 |
+
|
| 15 |
+
# Stop tokens for Mistral format
|
| 16 |
+
PARAMETER stop "<s>"
|
| 17 |
+
PARAMETER stop "[INST]"
|
| 18 |
+
PARAMETER stop "[/INST]"
|
| 19 |
+
PARAMETER stop "</s>"
|
| 20 |
+
|
| 21 |
+
# System prompt - customize this for your use case
|
| 22 |
+
SYSTEM """Jesteś pomocnym asystentem AI stworzonym przez fine-tuning modelu Mistral 7B.
|
| 23 |
+
|
| 24 |
+
Twoje specjalizacje:
|
| 25 |
+
- Programowanie w Pythonie
|
| 26 |
+
- Machine Learning i AI
|
| 27 |
+
- Docker i DevOps
|
| 28 |
+
- Wyjaśnianie technicznych konceptów
|
| 29 |
+
|
| 30 |
+
Odpowiadasz w języku polskim, jesteś precyzyjny i podajesz praktyczne przykłady.
|
| 31 |
+
Gdy nie znasz odpowiedzi, uczciwie to przyznajesz.
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
# Chat template for Mistral Instruct format
|
| 35 |
+
TEMPLATE """<s>{{- if .System }}[INST] {{ .System }}
|
| 36 |
+
|
| 37 |
+
{{ .Prompt }} [/INST]{{ else }}[INST] {{ .Prompt }} [/INST]{{ end }} {{ .Response }}</s>"""
|
| 38 |
+
|
| 39 |
+
# Model metadata
|
| 40 |
+
PARAMETER num_thread 4
|
| 41 |
+
PARAMETER num_gpu_layers 20
|
README.md
CHANGED
|
@@ -1,3 +1,388 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🚀 Minimalne LLM + Własny Model - Kompletny Guide
|
| 2 |
+
|
| 3 |
+
## 🎯 **CZĘŚĆ 1: Uruchomienie w 2 minuty**
|
| 4 |
+
|
| 5 |
+
### Szybki start (minimalne rozwiązanie)
|
| 6 |
+
```bash
|
| 7 |
+
# 1. Sklonuj pliki
|
| 8 |
+
git clone <your-repo>
|
| 9 |
+
cd minimal-llm
|
| 10 |
+
|
| 11 |
+
# 2. Uruchom wszystko jedną komendą
|
| 12 |
+
chmod +x quick-start.sh
|
| 13 |
+
./quick-start.sh
|
| 14 |
+
|
| 15 |
+
# 3. Otwórz przeglądarkę
|
| 16 |
+
# http://localhost:8501 - Streamlit UI
|
| 17 |
+
# http://localhost:11434 - Ollama API
|
| 18 |
+
```
|
| 19 |
+
|
| 20 |
+
### Co się dzieje pod spodem?
|
| 21 |
+
- **Ollama** - pobiera i uruchamia Mistral 7B
|
| 22 |
+
- **Streamlit** - prosty chat interface
|
| 23 |
+
- **Docker** - wszystko w kontenerach
|
| 24 |
+
- **Minimalne zależności** - tylko 3 pakiety Python!
|
| 25 |
+
|
| 26 |
+
## 📁 **Struktura projektu (minimalna)**
|
| 27 |
+
```
|
| 28 |
+
minimal-llm/
|
| 29 |
+
├── docker-compose.yml # 1 plik - cała infrastruktura
|
| 30 |
+
├── Dockerfile # Minimalne image
|
| 31 |
+
├── requirements.txt # 3 pakiety
|
| 32 |
+
├── quick-start.sh # 1 komenda = pełny setup
|
| 33 |
+
└── app/
|
| 34 |
+
└── main.py # 50 linijek = pełny chat
|
| 35 |
+
```
|
| 36 |
+
|
| 37 |
+
## 🎯 **CZĘŚĆ 2: Stwórz własny model LLM**
|
| 38 |
+
|
| 39 |
+
### Krok 1: Przygotowanie środowiska
|
| 40 |
+
```bash
|
| 41 |
+
# Instalacja zależności do fine-tuningu
|
| 42 |
+
pip install -r model_requirements.txt
|
| 43 |
+
|
| 44 |
+
# Login do Hugging Face (do publikacji)
|
| 45 |
+
huggingface-cli login
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
### Krok 2: Przygotowanie danych
|
| 49 |
+
```bash
|
| 50 |
+
python create_custom_model.py
|
| 51 |
+
# Wybierz opcję 1: Stwórz sample dataset
|
| 52 |
+
```
|
| 53 |
+
|
| 54 |
+
Przykład danych treningowych:
|
| 55 |
+
```json
|
| 56 |
+
[
|
| 57 |
+
{
|
| 58 |
+
"instruction": "Jak nazywa się stolica Polski?",
|
| 59 |
+
"input": "",
|
| 60 |
+
"output": "Stolica Polski to Warszawa."
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"instruction": "Wyjaśnij czym jest sztuczna inteligencja",
|
| 64 |
+
"input": "",
|
| 65 |
+
"output": "Sztuczna inteligencja (AI) to dziedzina informatyki..."
|
| 66 |
+
}
|
| 67 |
+
]
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
### Krok 3: Fine-tuning modelu
|
| 71 |
+
```bash
|
| 72 |
+
# Uruchom fine-tuning (wymaga GPU)
|
| 73 |
+
python create_custom_model.py
|
| 74 |
+
# Wybierz opcję 2: Fine-tune model
|
| 75 |
+
|
| 76 |
+
# Lub pełny pipeline
|
| 77 |
+
python create_custom_model.py
|
| 78 |
+
# Wybierz opcję 6: Pełny pipeline
|
| 79 |
+
```
|
| 80 |
+
|
| 81 |
+
**Optymalizacje dla RTX 3050:**
|
| 82 |
+
- 4-bit quantization
|
| 83 |
+
- LoRA (Low-Rank Adaptation)
|
| 84 |
+
- Batch size = 1
|
| 85 |
+
- Gradient accumulation = 4
|
| 86 |
+
- Mixed precision (FP16)
|
| 87 |
+
|
| 88 |
+
### Krok 4: Konwersja do GGUF
|
| 89 |
+
```bash
|
| 90 |
+
# Automatycznie generowany skrypt
|
| 91 |
+
./convert_to_gguf.sh
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
### Krok 5: Stworzenie modelu w Ollama
|
| 95 |
+
```bash
|
| 96 |
+
# Utwórz Modelfile
|
| 97 |
+
python create_custom_model.py # wybierz opcję 4
|
| 98 |
+
|
| 99 |
+
# Stwórz model w Ollama
|
| 100 |
+
ollama create wronai -f Modelfile
|
| 101 |
+
|
| 102 |
+
# Uruchom model
|
| 103 |
+
ollama run wronai
|
| 104 |
+
```
|
| 105 |
+
|
| 106 |
+
### Uruchamianie skryptu
|
| 107 |
+
Skrypt `create_custom_model.py` oferuje interaktywne menu z następującymi opcjami:
|
| 108 |
+
|
| 109 |
+
```bash
|
| 110 |
+
python create_custom_model.py
|
| 111 |
+
```
|
| 112 |
+
|
| 113 |
+
Dostępne opcje:
|
| 114 |
+
1. Stwórz przykładowy dataset
|
| 115 |
+
2. Wykonaj fine-tuning modelu
|
| 116 |
+
3. Konwertuj model do formatu GGUF
|
| 117 |
+
4. Utwórz Modelfile dla Ollama
|
| 118 |
+
5. Opublikuj model na Hugging Face
|
| 119 |
+
6. Wykonaj pełny pipeline (1-5)
|
| 120 |
+
|
| 121 |
+
### Wymagania wstępne
|
| 122 |
+
- Python 3.8+
|
| 123 |
+
- PyTorch z obsługą CUDA (zalecane)
|
| 124 |
+
- Biblioteki wymienione w `model_requirements.txt`
|
| 125 |
+
- Konto na [Hugging Face](https://huggingface.co/) (do publikacji modelu)
|
| 126 |
+
|
| 127 |
+
### Rozwiązywanie problemów
|
| 128 |
+
|
| 129 |
+
#### Błąd składni w skrypcie
|
| 130 |
+
Jeśli napotkasz błąd składni, upewnij się, że:
|
| 131 |
+
1. Używasz Pythona 3.8 lub nowszego
|
| 132 |
+
2. Wszystkie zależności są zainstalowane
|
| 133 |
+
3. Plik nie został uszkodzony podczas pobierania
|
| 134 |
+
|
| 135 |
+
#### Problemy z zależnościami
|
| 136 |
+
```bash
|
| 137 |
+
# Utwórz i aktywuj środowisko wirtualne
|
| 138 |
+
python -m venv .venv
|
| 139 |
+
source .venv/bin/activate # Linux/Mac
|
| 140 |
+
.venv\Scripts\activate # Windows
|
| 141 |
+
|
| 142 |
+
# Zainstaluj zależności
|
| 143 |
+
pip install -r model_requirements.txt
|
| 144 |
+
```
|
| 145 |
+
|
| 146 |
+
#### Brakujące uprawnienia
|
| 147 |
+
Jeśli napotkasz problemy z uprawnieniami, spróbuj:
|
| 148 |
+
```bash
|
| 149 |
+
# Nadaj uprawnienia do wykonywania skryptów
|
| 150 |
+
chmod +x *.sh
|
| 151 |
+
|
| 152 |
+
# Uruchom z uprawnieniami administratora (jeśli potrzebne)
|
| 153 |
+
sudo python create_custom_model.py
|
| 154 |
+
```
|
| 155 |
+
|
| 156 |
+
### Kontrybucja
|
| 157 |
+
Zapraszamy do zgłaszania problemów i propozycji zmian poprzez Issues i Pull Requests.
|
| 158 |
+
|
| 159 |
+
# Test modelu
|
| 160 |
+
ollama run wronai "Cześć! Kim jesteś?"
|
| 161 |
+
```
|
| 162 |
+
|
| 163 |
+
### Krok 6: Publikacja modelu
|
| 164 |
+
|
| 165 |
+
#### **Opcja A: Ollama Registry**
|
| 166 |
+
```bash
|
| 167 |
+
# Push do Ollama Library
|
| 168 |
+
ollama push wronai
|
| 169 |
+
|
| 170 |
+
# Teraz każdy może użyć:
|
| 171 |
+
ollama pull your-username/wronai
|
| 172 |
+
```
|
| 173 |
+
|
| 174 |
+
#### **Opcja B: Hugging Face Hub**
|
| 175 |
+
```bash
|
| 176 |
+
# Publikacja na HF
|
| 177 |
+
python publish_to_hf.py
|
| 178 |
+
|
| 179 |
+
# Model dostępny na:
|
| 180 |
+
# https://huggingface.co/your-username/my-custom-mistral-7b
|
| 181 |
+
```
|
| 182 |
+
|
| 183 |
+
#### **Opcja C: Docker Registry**
|
| 184 |
+
```bash
|
| 185 |
+
# Spakuj do Docker image
|
| 186 |
+
docker build -t my-custom-llm .
|
| 187 |
+
docker tag my-custom-llm your-registry/my-custom-llm
|
| 188 |
+
docker push your-registry/my-custom-llm
|
| 189 |
+
```
|
| 190 |
+
|
| 191 |
+
## 🎯 **CZĘŚĆ 3: Gotowe alternatywy (zero kodu)**
|
| 192 |
+
|
| 193 |
+
### **1. Najprostsze - Ollama**
|
| 194 |
+
```bash
|
| 195 |
+
# Instalacja
|
| 196 |
+
curl -fsSL https://ollama.ai/install.sh | sh
|
| 197 |
+
|
| 198 |
+
# Uruchomienie modelu
|
| 199 |
+
ollama run mistral:7b-instruct
|
| 200 |
+
|
| 201 |
+
# API automatycznie na localhost:11434
|
| 202 |
+
```
|
| 203 |
+
|
| 204 |
+
### **2. Hugging Face Inference API**
|
| 205 |
+
```python
|
| 206 |
+
import requests
|
| 207 |
+
|
| 208 |
+
headers = {"Authorization": "Bearer YOUR_HF_TOKEN"}
|
| 209 |
+
response = requests.post(
|
| 210 |
+
"https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.1",
|
| 211 |
+
headers=headers,
|
| 212 |
+
json={"inputs": "Hello!"}
|
| 213 |
+
)
|
| 214 |
+
```
|
| 215 |
+
|
| 216 |
+
### **3. Groq (ultra szybkie)**
|
| 217 |
+
```python
|
| 218 |
+
from openai import OpenAI
|
| 219 |
+
|
| 220 |
+
client = OpenAI(
|
| 221 |
+
api_key="YOUR_GROQ_KEY",
|
| 222 |
+
base_url="https://api.groq.com/openai/v1"
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
response = client.chat.completions.create(
|
| 226 |
+
model="mistral-7b-instruct",
|
| 227 |
+
messages=[{"role": "user", "content": "Hello!"}]
|
| 228 |
+
)
|
| 229 |
+
```
|
| 230 |
+
|
| 231 |
+
### **4. Together.ai**
|
| 232 |
+
```python
|
| 233 |
+
from openai import OpenAI
|
| 234 |
+
|
| 235 |
+
client = OpenAI(
|
| 236 |
+
api_key="YOUR_TOGETHER_KEY",
|
| 237 |
+
base_url="https://api.together.xyz/v1"
|
| 238 |
+
)
|
| 239 |
+
|
| 240 |
+
# Kompatybilne z OpenAI API
|
| 241 |
+
```
|
| 242 |
+
|
| 243 |
+
### **5. Modal.com (serverless GPU)**
|
| 244 |
+
```python
|
| 245 |
+
import modal
|
| 246 |
+
|
| 247 |
+
stub = modal.Stub("llm-api")
|
| 248 |
+
|
| 249 |
+
@stub.function(gpu="T4")
|
| 250 |
+
def generate(prompt: str):
|
| 251 |
+
# Twój kod modelu
|
| 252 |
+
return model.generate(prompt)
|
| 253 |
+
|
| 254 |
+
# Deploy jedną komendą
|
| 255 |
+
# modal deploy
|
| 256 |
+
```
|
| 257 |
+
|
| 258 |
+
## 🎯 **CZĘŚĆ 4: Frontend opcje**
|
| 259 |
+
|
| 260 |
+
### **1. Streamlit (Python)**
|
| 261 |
+
```python
|
| 262 |
+
import streamlit as st
|
| 263 |
+
|
| 264 |
+
st.title("My LLM Chat")
|
| 265 |
+
prompt = st.text_input("Message:")
|
| 266 |
+
if st.button("Send"):
|
| 267 |
+
response = generate(prompt)
|
| 268 |
+
st.write(response)
|
| 269 |
+
```
|
| 270 |
+
|
| 271 |
+
### **2. Gradio (Python)**
|
| 272 |
+
```python
|
| 273 |
+
import gradio as gr
|
| 274 |
+
|
| 275 |
+
def chat(message, history):
|
| 276 |
+
response = generate(message)
|
| 277 |
+
history.append([message, response])
|
| 278 |
+
return "", history
|
| 279 |
+
|
| 280 |
+
gr.ChatInterface(chat).launch()
|
| 281 |
+
```
|
| 282 |
+
|
| 283 |
+
### **3. Next.js + Vercel AI SDK**
|
| 284 |
+
```tsx
|
| 285 |
+
import { useChat } from 'ai/react'
|
| 286 |
+
|
| 287 |
+
export default function Chat() {
|
| 288 |
+
const { messages, input, handleInputChange, handleSubmit } = useChat()
|
| 289 |
+
|
| 290 |
+
return (
|
| 291 |
+
<div>
|
| 292 |
+
{messages.map(m => <div key={m.id}>{m.content}</div>)}
|
| 293 |
+
<form onSubmit={handleSubmit}>
|
| 294 |
+
<input value={input} onChange={handleInputChange} />
|
| 295 |
+
</form>
|
| 296 |
+
</div>
|
| 297 |
+
)
|
| 298 |
+
}
|
| 299 |
+
```
|
| 300 |
+
|
| 301 |
+
## 🎯 **CZĘŚĆ 5: Porównanie rozwiązań**
|
| 302 |
+
|
| 303 |
+
| Rozwiązanie | Setup Time | Kód | Hosting | GPU |
|
| 304 |
+
|-------------|------------|-----|---------|-----|
|
| 305 |
+
| **Ollama + Streamlit** | 2 min | 50 linijek | Local/Docker | Optional |
|
| 306 |
+
| **Hugging Face API** | 30 sec | 5 linijek | Cloud | No |
|
| 307 |
+
| **Groq API** | 1 min | 5 linijek | Cloud | No |
|
| 308 |
+
| **Modal.com** | 5 min | 20 linijek | Serverless | Auto |
|
| 309 |
+
| **Custom Fine-tuning** | 2 hours | 200 linijek | Self-hosted | Required |
|
| 310 |
+
|
| 311 |
+
## 🛠️ **Debugging & Tips**
|
| 312 |
+
|
| 313 |
+
### Typowe problemy
|
| 314 |
+
```bash
|
| 315 |
+
# Model nie ładuje się
|
| 316 |
+
docker logs ollama-engine
|
| 317 |
+
|
| 318 |
+
# Brak GPU
|
| 319 |
+
docker run --rm --gpus all nvidia/cuda:11.8-base nvidia-smi
|
| 320 |
+
|
| 321 |
+
# Port zajęty
|
| 322 |
+
sudo netstat -tlnp | grep 11434
|
| 323 |
+
|
| 324 |
+
# Restart wszystkiego
|
| 325 |
+
docker compose down && docker compose up -d
|
| 326 |
+
```
|
| 327 |
+
|
| 328 |
+
### Optymalizacje RTX 3050
|
| 329 |
+
```python
|
| 330 |
+
# W fine-tuningu
|
| 331 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 332 |
+
model_name,
|
| 333 |
+
load_in_4bit=True, # 4-bit quantization
|
| 334 |
+
torch_dtype=torch.float16 # Half precision
|
| 335 |
+
)
|
| 336 |
+
|
| 337 |
+
# Training args
|
| 338 |
+
TrainingArguments(
|
| 339 |
+
per_device_train_batch_size=1, # Mały batch
|
| 340 |
+
gradient_accumulation_steps=4, # Gradients accumulation
|
| 341 |
+
fp16=True # Mixed precision
|
| 342 |
+
)
|
| 343 |
+
```
|
| 344 |
+
|
| 345 |
+
### Monitoring zasobów
|
| 346 |
+
```bash
|
| 347 |
+
# GPU monitoring
|
| 348 |
+
watch -n 1 nvidia-smi
|
| 349 |
+
|
| 350 |
+
# Container resources
|
| 351 |
+
docker stats
|
| 352 |
+
|
| 353 |
+
# Model memory usage
|
| 354 |
+
docker exec -it ollama-engine ollama ps
|
| 355 |
+
```
|
| 356 |
+
|
| 357 |
+
## 🎯 **Następne kroki**
|
| 358 |
+
|
| 359 |
+
### Dla nauki:
|
| 360 |
+
1. **Eksperymentuj z różnymi modelami** - Llama, CodeLlama, Phi-3
|
| 361 |
+
2. **Testuj różne techniki fine-tuningu** - LoRA, QLoRA, Full fine-tuning
|
| 362 |
+
3. **Dodaj RAG** - Retrieval Augmented Generation
|
| 363 |
+
4. **Stwórz multi-agent system**
|
| 364 |
+
|
| 365 |
+
### Dla produkcji:
|
| 366 |
+
1. **Przejdź na managed service** - Groq, Together.ai
|
| 367 |
+
2. **Setup monitoring** - LangSmith, Weights & Biases
|
| 368 |
+
3. **Dodaj cache** - Redis dla odpowiedzi
|
| 369 |
+
4. **Implement rate limiting**
|
| 370 |
+
|
| 371 |
+
### Dla biznesu:
|
| 372 |
+
1. **Fine-tune na własnych danych**
|
| 373 |
+
2. **Setup A/B testing** różnych modeli
|
| 374 |
+
3. **Dodaj feedback loop** od użytkowników
|
| 375 |
+
4. **Monetize API**
|
| 376 |
+
|
| 377 |
+
## 🎉 **Podsumowanie**
|
| 378 |
+
|
| 379 |
+
**Wybierz opcję według potrzeb:**
|
| 380 |
+
|
| 381 |
+
- **Demo/nauka**: Ollama + Streamlit (to rozwiązanie)
|
| 382 |
+
- **Prototyp**: Hugging Face API + Gradio
|
| 383 |
+
- **MVP**: Groq API + Next.js
|
| 384 |
+
- **Produkcja**: Modal/RunPod + custom frontend
|
| 385 |
+
- **Enterprise**: Fine-tuned model + własna infrastruktura
|
| 386 |
+
|
| 387 |
+
**Minimalne rozwiązanie = 5 plików, 50 linijek kodu, 2 minuty setup!**
|
| 388 |
+
|
app/main.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Minimalna aplikacja LLM w 50 linijkach!
|
| 4 |
+
Streamlit + Ollama = zero konfiguracji
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import streamlit as st
|
| 8 |
+
import ollama
|
| 9 |
+
import os
|
| 10 |
+
from typing import Generator
|
| 11 |
+
|
| 12 |
+
# Konfiguracja
|
| 13 |
+
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434")
|
| 14 |
+
MODEL_NAME = "mistral:7b-instruct"
|
| 15 |
+
|
| 16 |
+
# Setup Ollama client
|
| 17 |
+
client = ollama.Client(host=OLLAMA_URL)
|
| 18 |
+
|
| 19 |
+
def stream_response(prompt: str) -> Generator[str, None, None]:
|
| 20 |
+
"""Generator dla streaming response"""
|
| 21 |
+
try:
|
| 22 |
+
stream = client.chat(
|
| 23 |
+
model=MODEL_NAME,
|
| 24 |
+
messages=[{"role": "user", "content": prompt}],
|
| 25 |
+
stream=True
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
for chunk in stream:
|
| 29 |
+
if chunk['message']['content']:
|
| 30 |
+
yield chunk['message']['content']
|
| 31 |
+
except Exception as e:
|
| 32 |
+
yield f"Error: {str(e)}"
|
| 33 |
+
|
| 34 |
+
def main():
|
| 35 |
+
# UI Setup
|
| 36 |
+
st.set_page_config(
|
| 37 |
+
page_title="🤖 Minimal LLM Chat",
|
| 38 |
+
page_icon="🤖",
|
| 39 |
+
layout="wide"
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
st.title("🤖 Minimal LLM Chat")
|
| 43 |
+
st.markdown("*Powered by Ollama + Mistral 7B*")
|
| 44 |
+
|
| 45 |
+
# Sidebar z ustawieniami
|
| 46 |
+
with st.sidebar:
|
| 47 |
+
st.header("⚙️ Settings")
|
| 48 |
+
|
| 49 |
+
# Model info
|
| 50 |
+
try:
|
| 51 |
+
models = client.list()
|
| 52 |
+
available_models = [m['name'] for m in models['models']]
|
| 53 |
+
st.success(f"✅ Connected to Ollama")
|
| 54 |
+
st.info(f"Available models: {len(available_models)}")
|
| 55 |
+
except:
|
| 56 |
+
st.error("❌ Cannot connect to Ollama")
|
| 57 |
+
st.stop()
|
| 58 |
+
|
| 59 |
+
# Parameters
|
| 60 |
+
temperature = st.slider("Temperature", 0.0, 2.0, 0.7, 0.1)
|
| 61 |
+
max_tokens = st.slider("Max tokens", 50, 1000, 500, 50)
|
| 62 |
+
|
| 63 |
+
# System prompt
|
| 64 |
+
system_prompt = st.text_area(
|
| 65 |
+
"System prompt:",
|
| 66 |
+
"You are a helpful AI assistant. Answer concisely and accurately.",
|
| 67 |
+
height=100
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
# Chat interface
|
| 71 |
+
if "messages" not in st.session_state:
|
| 72 |
+
st.session_state.messages = []
|
| 73 |
+
|
| 74 |
+
# Display chat history
|
| 75 |
+
for message in st.session_state.messages:
|
| 76 |
+
with st.chat_message(message["role"]):
|
| 77 |
+
st.markdown(message["content"])
|
| 78 |
+
|
| 79 |
+
# Chat input
|
| 80 |
+
if prompt := st.chat_input("Type your message here..."):
|
| 81 |
+
# Add user message
|
| 82 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 83 |
+
with st.chat_message("user"):
|
| 84 |
+
st.markdown(prompt)
|
| 85 |
+
|
| 86 |
+
# Generate response
|
| 87 |
+
with st.chat_message("assistant"):
|
| 88 |
+
response_placeholder = st.empty()
|
| 89 |
+
full_response = ""
|
| 90 |
+
|
| 91 |
+
# Streaming response
|
| 92 |
+
for chunk in stream_response(prompt):
|
| 93 |
+
full_response += chunk
|
| 94 |
+
response_placeholder.markdown(full_response + "▌")
|
| 95 |
+
|
| 96 |
+
response_placeholder.markdown(full_response)
|
| 97 |
+
|
| 98 |
+
# Add assistant message
|
| 99 |
+
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
| 100 |
+
|
| 101 |
+
# Quick actions
|
| 102 |
+
col1, col2, col3 = st.columns(3)
|
| 103 |
+
|
| 104 |
+
with col1:
|
| 105 |
+
if st.button("🗑️ Clear Chat"):
|
| 106 |
+
st.session_state.messages = []
|
| 107 |
+
st.rerun()
|
| 108 |
+
|
| 109 |
+
with col2:
|
| 110 |
+
if st.button("💡 Example Question"):
|
| 111 |
+
example = "Explain quantum computing in simple terms"
|
| 112 |
+
st.session_state.messages.append({"role": "user", "content": example})
|
| 113 |
+
st.rerun()
|
| 114 |
+
|
| 115 |
+
with col3:
|
| 116 |
+
if st.button("📊 Model Info"):
|
| 117 |
+
try:
|
| 118 |
+
info = client.show(MODEL_NAME)
|
| 119 |
+
st.json(info)
|
| 120 |
+
except:
|
| 121 |
+
st.error("Cannot get model info")
|
| 122 |
+
|
| 123 |
+
if __name__ == "__main__":
|
| 124 |
+
main()
|
convert_to_gguf.sh
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# 🔄 Convert fine-tuned model to GGUF format for Ollama
|
| 4 |
+
# This script converts your custom fine-tuned model to GGUF format
|
| 5 |
+
|
| 6 |
+
set -e
|
| 7 |
+
|
| 8 |
+
# Colors for output
|
| 9 |
+
RED='\033[0;31m'
|
| 10 |
+
GREEN='\033[0;32m'
|
| 11 |
+
YELLOW='\033[1;33m'
|
| 12 |
+
BLUE='\033[0;34m'
|
| 13 |
+
NC='\033[0m' # No Color
|
| 14 |
+
|
| 15 |
+
echo -e "${BLUE}🔄 Converting Model to GGUF Format${NC}"
|
| 16 |
+
echo "====================================="
|
| 17 |
+
|
| 18 |
+
# Configuration
|
| 19 |
+
MODEL_DIR="./fine_tuned_model"
|
| 20 |
+
OUTPUT_FILE="my_custom_model.gguf"
|
| 21 |
+
LLAMA_CPP_DIR="./llama.cpp"
|
| 22 |
+
|
| 23 |
+
# Check if fine-tuned model exists
|
| 24 |
+
if [ ! -d "$MODEL_DIR" ]; then
|
| 25 |
+
echo -e "${RED}❌ Fine-tuned model not found at: $MODEL_DIR${NC}"
|
| 26 |
+
echo "Run fine-tuning first: python create_custom_model.py (option 2)"
|
| 27 |
+
exit 1
|
| 28 |
+
fi
|
| 29 |
+
|
| 30 |
+
echo -e "${GREEN}✅ Found fine-tuned model at: $MODEL_DIR${NC}"
|
| 31 |
+
|
| 32 |
+
# Check if llama.cpp exists, if not clone it
|
| 33 |
+
if [ ! -d "$LLAMA_CPP_DIR" ]; then
|
| 34 |
+
echo -e "${YELLOW}📥 Cloning llama.cpp...${NC}"
|
| 35 |
+
git clone https://github.com/ggerganov/llama.cpp.git
|
| 36 |
+
|
| 37 |
+
echo -e "${YELLOW}🔨 Building llama.cpp...${NC}"
|
| 38 |
+
cd llama.cpp
|
| 39 |
+
|
| 40 |
+
# Build with CUDA support if available
|
| 41 |
+
if command -v nvcc &> /dev/null; then
|
| 42 |
+
echo -e "${GREEN}🚀 NVIDIA CUDA detected, building with GPU support${NC}"
|
| 43 |
+
make LLAMA_CUBLAS=1 -j$(nproc)
|
| 44 |
+
else
|
| 45 |
+
echo -e "${YELLOW}⚠️ No CUDA detected, building CPU-only version${NC}"
|
| 46 |
+
make -j$(nproc)
|
| 47 |
+
fi
|
| 48 |
+
|
| 49 |
+
cd ..
|
| 50 |
+
else
|
| 51 |
+
echo -e "${GREEN}✅ llama.cpp already exists${NC}"
|
| 52 |
+
fi
|
| 53 |
+
|
| 54 |
+
# Check required Python dependencies
|
| 55 |
+
echo -e "${BLUE}📦 Checking Python dependencies...${NC}"
|
| 56 |
+
python3 -c "import torch, transformers, sentencepiece" 2>/dev/null || {
|
| 57 |
+
echo -e "${YELLOW}⚠️ Installing missing dependencies...${NC}"
|
| 58 |
+
pip install torch transformers sentencepiece protobuf
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
# Convert model to GGUF
|
| 62 |
+
echo -e "${BLUE}🔄 Converting to GGUF format...${NC}"
|
| 63 |
+
echo "This may take several minutes..."
|
| 64 |
+
|
| 65 |
+
# Method 1: Direct conversion (recommended)
|
| 66 |
+
if [ -f "$LLAMA_CPP_DIR/convert.py" ]; then
|
| 67 |
+
echo -e "${GREEN}Using convert.py${NC}"
|
| 68 |
+
python3 "$LLAMA_CPP_DIR/convert.py" \
|
| 69 |
+
"$MODEL_DIR" \
|
| 70 |
+
--outtype f16 \
|
| 71 |
+
--outfile "$OUTPUT_FILE"
|
| 72 |
+
else
|
| 73 |
+
# Method 2: Convert via HF format (fallback)
|
| 74 |
+
echo -e "${YELLOW}Using alternative conversion method${NC}"
|
| 75 |
+
python3 -c "
|
| 76 |
+
import torch
|
| 77 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 78 |
+
import sys
|
| 79 |
+
import os
|
| 80 |
+
|
| 81 |
+
print('Loading model...')
|
| 82 |
+
model = AutoModelForCausalLM.from_pretrained('$MODEL_DIR', torch_dtype=torch.float16)
|
| 83 |
+
tokenizer = AutoTokenizer.from_pretrained('$MODEL_DIR')
|
| 84 |
+
|
| 85 |
+
print('Saving in HF format...')
|
| 86 |
+
model.save_pretrained('./temp_hf_model', safe_serialization=True)
|
| 87 |
+
tokenizer.save_pretrained('./temp_hf_model')
|
| 88 |
+
print('Conversion to HF format complete')
|
| 89 |
+
"
|
| 90 |
+
|
| 91 |
+
# Then convert HF to GGUF
|
| 92 |
+
if [ -d "./temp_hf_model" ]; then
|
| 93 |
+
python3 "$LLAMA_CPP_DIR/convert.py" \
|
| 94 |
+
"./temp_hf_model" \
|
| 95 |
+
--outtype f16 \
|
| 96 |
+
--outfile "$OUTPUT_FILE"
|
| 97 |
+
rm -rf ./temp_hf_model
|
| 98 |
+
fi
|
| 99 |
+
fi
|
| 100 |
+
|
| 101 |
+
# Verify conversion
|
| 102 |
+
if [ -f "$OUTPUT_FILE" ]; then
|
| 103 |
+
FILE_SIZE=$(du -h "$OUTPUT_FILE" | cut -f1)
|
| 104 |
+
echo
|
| 105 |
+
echo -e "${GREEN}🎉 Conversion successful!${NC}"
|
| 106 |
+
echo -e "${BLUE}📄 Output file: $OUTPUT_FILE${NC}"
|
| 107 |
+
echo -e "${BLUE}📊 File size: $FILE_SIZE${NC}"
|
| 108 |
+
|
| 109 |
+
# Optional: Quantize to smaller sizes
|
| 110 |
+
echo
|
| 111 |
+
echo -e "${YELLOW}💡 Optional: Create quantized versions?${NC}"
|
| 112 |
+
read -p "Create Q4_K_M quantized version? (y/N): " -n 1 -r
|
| 113 |
+
echo
|
| 114 |
+
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
| 115 |
+
echo -e "${BLUE}🔄 Creating Q4_K_M quantized version...${NC}"
|
| 116 |
+
"$LLAMA_CPP_DIR/quantize" "$OUTPUT_FILE" "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" Q4_K_M
|
| 117 |
+
|
| 118 |
+
if [ -f "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" ]; then
|
| 119 |
+
QUANT_SIZE=$(du -h "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" | cut -f1)
|
| 120 |
+
echo -e "${GREEN}✅ Quantized version created: ${OUTPUT_FILE%.gguf}_q4_k_m.gguf ($QUANT_SIZE)${NC}"
|
| 121 |
+
fi
|
| 122 |
+
fi
|
| 123 |
+
|
| 124 |
+
# Test the converted model
|
| 125 |
+
echo
|
| 126 |
+
echo -e "${YELLOW}🧪 Test the converted model?${NC}"
|
| 127 |
+
read -p "Run a quick test? (y/N): " -n 1 -r
|
| 128 |
+
echo
|
| 129 |
+
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
| 130 |
+
echo -e "${BLUE}🧪 Testing model...${NC}"
|
| 131 |
+
echo "Prompt: 'Hello, how are you?'"
|
| 132 |
+
echo "Response:"
|
| 133 |
+
"$LLAMA_CPP_DIR/main" -m "$OUTPUT_FILE" -p "Hello, how are you?" -n 50 --temp 0.7
|
| 134 |
+
fi
|
| 135 |
+
|
| 136 |
+
else
|
| 137 |
+
echo -e "${RED}❌ Conversion failed!${NC}"
|
| 138 |
+
echo "Check the error messages above."
|
| 139 |
+
exit 1
|
| 140 |
+
fi
|
| 141 |
+
|
| 142 |
+
# Instructions for next steps
|
| 143 |
+
echo
|
| 144 |
+
echo -e "${GREEN}🎯 Next Steps:${NC}"
|
| 145 |
+
echo "1. Create Ollama Modelfile:"
|
| 146 |
+
echo " python create_custom_model.py # option 4"
|
| 147 |
+
echo
|
| 148 |
+
echo "2. Import to Ollama:"
|
| 149 |
+
echo " ollama create my-custom-model -f Modelfile"
|
| 150 |
+
echo
|
| 151 |
+
echo "3. Test in Ollama:"
|
| 152 |
+
echo " ollama run my-custom-model \"Hello!\""
|
| 153 |
+
echo
|
| 154 |
+
echo "4. Push to Ollama Library:"
|
| 155 |
+
echo " ollama push my-custom-model"
|
| 156 |
+
echo
|
| 157 |
+
echo -e "${BLUE}📚 Files created:${NC}"
|
| 158 |
+
echo " • $OUTPUT_FILE (F16 version)"
|
| 159 |
+
if [ -f "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" ]; then
|
| 160 |
+
echo " • ${OUTPUT_FILE%.gguf}_q4_k_m.gguf (Quantized version)"
|
| 161 |
+
fi
|
| 162 |
+
|
| 163 |
+
echo
|
| 164 |
+
echo -e "${GREEN}🎉 GGUF conversion completed successfully!${NC}"
|
create_custom_model.py
ADDED
|
@@ -0,0 +1,373 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
🎯 Tworzenie własnego modelu LLM - od fine-tuningu do publikacji
|
| 4 |
+
Przykład z Mistral 7B + własne dane
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import json
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from datasets import Dataset
|
| 11 |
+
from transformers import (
|
| 12 |
+
AutoModelForCausalLM,
|
| 13 |
+
AutoTokenizer,
|
| 14 |
+
TrainingArguments,
|
| 15 |
+
Trainer
|
| 16 |
+
)
|
| 17 |
+
from peft import LoraConfig, get_peft_model, TaskType
|
| 18 |
+
import torch
|
| 19 |
+
|
| 20 |
+
# === 1. PRZYGOTOWANIE DANYCH ===
|
| 21 |
+
|
| 22 |
+
def create_sample_dataset():
|
| 23 |
+
"""Tworzy przykładowy dataset do fine-tuningu"""
|
| 24 |
+
|
| 25 |
+
# Przykładowe dane - zamień na swoje!
|
| 26 |
+
sample_data = [
|
| 27 |
+
{
|
| 28 |
+
"instruction": "Jak nazywa się stolica Polski?",
|
| 29 |
+
"input": "",
|
| 30 |
+
"output": "Stolica Polski to Warszawa."
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"instruction": "Wyjaśnij czym jest sztuczna inteligencja",
|
| 34 |
+
"input": "",
|
| 35 |
+
"output": "Sztuczna inteligencja (AI) to dziedzina informatyki zajmująca się tworzeniem systemów zdolnych do wykonywania zadań wymagających inteligencji."
|
| 36 |
+
},
|
| 37 |
+
{
|
| 38 |
+
"instruction": "Napisz krótką funkcję w Pythonie",
|
| 39 |
+
"input": "funkcja do obliczania silni",
|
| 40 |
+
"output": "```python\ndef factorial(n):\n if n <= 1:\n return 1\n return n * factorial(n-1)\n```"
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"instruction": "Jak zoptymalizować kod Python?",
|
| 44 |
+
"input": "",
|
| 45 |
+
"output": "Główne sposoby optymalizacji kodu Python:\n1. Używaj wbudowanych funkcji\n2. Unikaj pętli, preferuj list comprehensions\n3. Używaj numpy dla operacji numerycznych\n4. Profiluj kod przed optymalizacją"
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"instruction": "Co to jest Docker?",
|
| 49 |
+
"input": "",
|
| 50 |
+
"output": "Docker to platforma konteneryzacji umożliwiająca pakowanie aplikacji wraz z zależnościami w lekkie, przenośne kontenery."
|
| 51 |
+
}
|
| 52 |
+
]
|
| 53 |
+
|
| 54 |
+
# Zapisz dataset
|
| 55 |
+
os.makedirs("data", exist_ok=True)
|
| 56 |
+
with open("data/training_data.json", "w", encoding="utf-8") as f:
|
| 57 |
+
json.dump(sample_data, f, indent=2, ensure_ascii=False)
|
| 58 |
+
|
| 59 |
+
print("✅ Sample dataset created in data/training_data.json")
|
| 60 |
+
return sample_data
|
| 61 |
+
|
| 62 |
+
def format_training_data(examples):
|
| 63 |
+
"""Formatuje dane dla Mistral Instruct"""
|
| 64 |
+
formatted_texts = []
|
| 65 |
+
|
| 66 |
+
for example in examples:
|
| 67 |
+
if example.get("input"):
|
| 68 |
+
prompt = f"<s>[INST] {example['instruction']}\n{example['input']} [/INST] {example['output']}</s>"
|
| 69 |
+
else:
|
| 70 |
+
prompt = f"<s>[INST] {example['instruction']} [/INST] {example['output']}</s>"
|
| 71 |
+
formatted_texts.append(prompt)
|
| 72 |
+
|
| 73 |
+
return {"text": formatted_texts}
|
| 74 |
+
|
| 75 |
+
# === 2. FINE-TUNING Z LORA ===
|
| 76 |
+
|
| 77 |
+
def setup_model_and_tokenizer(model_name="mistralai/Mistral-7B-Instruct-v0.1"):
|
| 78 |
+
"""Ładuje model i tokenizer"""
|
| 79 |
+
print(f"📥 Loading model: {model_name}")
|
| 80 |
+
|
| 81 |
+
# Tokenizer
|
| 82 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 83 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 84 |
+
tokenizer.padding_side = "right"
|
| 85 |
+
|
| 86 |
+
# Model z quantization dla RTX 3050
|
| 87 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 88 |
+
model_name,
|
| 89 |
+
torch_dtype=torch.float16,
|
| 90 |
+
device_map="auto",
|
| 91 |
+
load_in_4bit=True, # 4-bit quantization
|
| 92 |
+
trust_remote_code=True
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
return model, tokenizer
|
| 96 |
+
|
| 97 |
+
def setup_lora_config():
|
| 98 |
+
"""Konfiguracja LoRA dla efficient fine-tuning"""
|
| 99 |
+
return LoraConfig(
|
| 100 |
+
task_type=TaskType.CAUSAL_LM,
|
| 101 |
+
inference_mode=False,
|
| 102 |
+
r=16, # LoRA rank
|
| 103 |
+
lora_alpha=32,
|
| 104 |
+
lora_dropout=0.1,
|
| 105 |
+
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"] # Mistral attention modules
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
def fine_tune_model():
|
| 109 |
+
"""Główna funkcja fine-tuningu"""
|
| 110 |
+
|
| 111 |
+
# 1. Przygotuj dane
|
| 112 |
+
print("🔄 Preparing training data...")
|
| 113 |
+
sample_data = create_sample_dataset()
|
| 114 |
+
|
| 115 |
+
# 2. Ładuj model
|
| 116 |
+
model, tokenizer = setup_model_and_tokenizer()
|
| 117 |
+
|
| 118 |
+
# 3. Setup LoRA
|
| 119 |
+
lora_config = setup_lora_config()
|
| 120 |
+
model = get_peft_model(model, lora_config)
|
| 121 |
+
|
| 122 |
+
print(f"📊 Trainable parameters: {model.print_trainable_parameters()}")
|
| 123 |
+
|
| 124 |
+
# 4. Przygotuj dataset
|
| 125 |
+
dataset = Dataset.from_list(sample_data)
|
| 126 |
+
formatted_dataset = dataset.map(
|
| 127 |
+
lambda x: format_training_data([x]),
|
| 128 |
+
remove_columns=dataset.column_names
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
# Tokenizacja
|
| 132 |
+
def tokenize_function(examples):
|
| 133 |
+
return tokenizer(
|
| 134 |
+
examples["text"],
|
| 135 |
+
truncation=True,
|
| 136 |
+
padding="max_length",
|
| 137 |
+
max_length=512,
|
| 138 |
+
return_tensors="pt"
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
tokenized_dataset = formatted_dataset.map(tokenize_function, batched=True)
|
| 142 |
+
|
| 143 |
+
# 5. Training arguments - optymalizowane dla RTX 3050
|
| 144 |
+
training_args = TrainingArguments(
|
| 145 |
+
output_dir="./results",
|
| 146 |
+
num_train_epochs=3,
|
| 147 |
+
per_device_train_batch_size=1, # Mały batch size dla RTX 3050
|
| 148 |
+
gradient_accumulation_steps=4,
|
| 149 |
+
warmup_steps=10,
|
| 150 |
+
learning_rate=2e-4,
|
| 151 |
+
fp16=True, # Mixed precision
|
| 152 |
+
logging_steps=1,
|
| 153 |
+
save_strategy="epoch",
|
| 154 |
+
evaluation_strategy="no",
|
| 155 |
+
dataloader_num_workers=0, # Avoid multiprocessing issues
|
| 156 |
+
remove_unused_columns=False,
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
# 6. Trainer
|
| 160 |
+
trainer = Trainer(
|
| 161 |
+
model=model,
|
| 162 |
+
args=training_args,
|
| 163 |
+
train_dataset=tokenized_dataset,
|
| 164 |
+
tokenizer=tokenizer,
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
# 7. Train!
|
| 168 |
+
print("🚀 Starting fine-tuning...")
|
| 169 |
+
trainer.train()
|
| 170 |
+
|
| 171 |
+
# 8. Save model
|
| 172 |
+
model.save_pretrained("./fine_tuned_model")
|
| 173 |
+
tokenizer.save_pretrained("./fine_tuned_model")
|
| 174 |
+
|
| 175 |
+
print("✅ Fine-tuning completed! Model saved to ./fine_tuned_model")
|
| 176 |
+
|
| 177 |
+
return model, tokenizer
|
| 178 |
+
|
| 179 |
+
# === 3. KONWERSJA DO GGUF ===
|
| 180 |
+
|
| 181 |
+
def convert_to_gguf():
|
| 182 |
+
"""Konwertuje model do formatu GGUF dla Ollama"""
|
| 183 |
+
|
| 184 |
+
print("🔄 Converting to GGUF format...")
|
| 185 |
+
|
| 186 |
+
# Ten skrypt wymaga llama.cpp
|
| 187 |
+
conversion_script = """
|
| 188 |
+
#!/bin/bash
|
| 189 |
+
|
| 190 |
+
# Pobierz llama.cpp jeśli nie masz
|
| 191 |
+
if [ ! -d "llama.cpp" ]; then
|
| 192 |
+
git clone https://github.com/ggerganov/llama.cpp.git
|
| 193 |
+
cd llama.cpp
|
| 194 |
+
make -j
|
| 195 |
+
cd ..
|
| 196 |
+
fi
|
| 197 |
+
|
| 198 |
+
# Konwertuj model
|
| 199 |
+
python llama.cpp/convert.py ./fine_tuned_model --outtype f16 --outfile my_custom_model.gguf
|
| 200 |
+
|
| 201 |
+
echo "✅ GGUF conversion completed: my_custom_model.gguf"
|
| 202 |
+
"""
|
| 203 |
+
|
| 204 |
+
with open("convert_to_gguf.sh", "w") as f:
|
| 205 |
+
f.write(conversion_script)
|
| 206 |
+
|
| 207 |
+
os.chmod("convert_to_gguf.sh", 0o755)
|
| 208 |
+
|
| 209 |
+
print("📝 Created convert_to_gguf.sh script")
|
| 210 |
+
print("Run: ./convert_to_gguf.sh")
|
| 211 |
+
|
| 212 |
+
# === 4. TWORZENIE MODELFILE DLA OLLAMA ===
|
| 213 |
+
|
| 214 |
+
def create_ollama_modelfile():
|
| 215 |
+
"""Tworzy Modelfile dla Ollama"""
|
| 216 |
+
|
| 217 |
+
modelfile_content = '''FROM ./my_custom_model.gguf
|
| 218 |
+
|
| 219 |
+
# Model metadata
|
| 220 |
+
PARAMETER temperature 0.7
|
| 221 |
+
PARAMETER top_p 0.9
|
| 222 |
+
PARAMETER top_k 40
|
| 223 |
+
PARAMETER num_ctx 2048
|
| 224 |
+
|
| 225 |
+
# System prompt
|
| 226 |
+
SYSTEM "Jesteś pomocnym asystentem AI stworzonym specjalnie dla polskich użytkowników.\nOdpowiadasz w języku polskim, jesteś precyzyjny i pomocny.\nSpecjalizujesz się w programowaniu, technologii i sztucznej inteligencji."
|
| 227 |
+
|
| 228 |
+
# Chat template dla Mistral
|
| 229 |
+
TEMPLATE "<s>[INST] {{ if .System }}{{ .System }}{{ end }}{{ .Prompt }} [/INST] {{ .Response }}</s>"
|
| 230 |
+
|
| 231 |
+
# Metadata
|
| 232 |
+
PARAMETER num_predict 256
|
| 233 |
+
PARAMETER stop "<s>"
|
| 234 |
+
PARAMETER stop "[INST]"
|
| 235 |
+
PARAMETER stop "[/INST]"
|
| 236 |
+
'''
|
| 237 |
+
|
| 238 |
+
with open("Modelfile", "w", encoding="utf-8") as f:
|
| 239 |
+
f.write(modelfile_content)
|
| 240 |
+
print("✅ Utworzono Modelfile dla Ollama")
|
| 241 |
+
print("✅ Created Modelfile for Ollama")
|
| 242 |
+
|
| 243 |
+
# === 5. PUBLIKACJA MODELU ===
|
| 244 |
+
|
| 245 |
+
def create_model_in_ollama():
|
| 246 |
+
"""Tworzy model w Ollama"""
|
| 247 |
+
|
| 248 |
+
ollama_commands = """
|
| 249 |
+
# 1. Utwórz model w Ollama
|
| 250 |
+
ollama create wronai -f Modelfile
|
| 251 |
+
|
| 252 |
+
# 2. Test modelu
|
| 253 |
+
ollama run wronai "Cześć! Kim jesteś?"
|
| 254 |
+
|
| 255 |
+
# 3. Push do Ollama Library (wymaga konta)
|
| 256 |
+
ollama push wronai
|
| 257 |
+
|
| 258 |
+
# 4. Alternatywnie - export do pliku
|
| 259 |
+
ollama save wronai wronai-model.tar
|
| 260 |
+
"""
|
| 261 |
+
|
| 262 |
+
with open("ollama_commands.sh", "w") as f:
|
| 263 |
+
f.write(ollama_commands)
|
| 264 |
+
|
| 265 |
+
print("✅ Created ollama_commands.sh")
|
| 266 |
+
|
| 267 |
+
# === 6. PUBLIKACJA NA HUGGING FACE ===
|
| 268 |
+
|
| 269 |
+
def create_hf_publish_script():
|
| 270 |
+
"""Skrypt do publikacji na Hugging Face"""
|
| 271 |
+
|
| 272 |
+
hf_script = '''#!/usr/bin/env python3
|
| 273 |
+
"""
|
| 274 |
+
Publikacja modelu na Hugging Face Hub
|
| 275 |
+
"""
|
| 276 |
+
|
| 277 |
+
from huggingface_hub import HfApi, create_repo
|
| 278 |
+
import os
|
| 279 |
+
|
| 280 |
+
def publish_to_hf():
|
| 281 |
+
# Konfiguracja
|
| 282 |
+
model_name = "your-username/my-custom-mistral-7b"
|
| 283 |
+
|
| 284 |
+
# Login (wymagany HF token)
|
| 285 |
+
# huggingface-cli login
|
| 286 |
+
|
| 287 |
+
# Utwórz repo
|
| 288 |
+
api = HfApi()
|
| 289 |
+
|
| 290 |
+
try:
|
| 291 |
+
create_repo(
|
| 292 |
+
repo_id=model_name,
|
| 293 |
+
repo_type="model",
|
| 294 |
+
private=False # Ustaw True dla prywatnego
|
| 295 |
+
)
|
| 296 |
+
print(f"✅ Repository created: {model_name}")
|
| 297 |
+
except Exception as e:
|
| 298 |
+
print(f"Repository may already exist: {e}")
|
| 299 |
+
|
| 300 |
+
# Upload plików
|
| 301 |
+
api.upload_folder(
|
| 302 |
+
folder_path="./fine_tuned_model",
|
| 303 |
+
repo_id=model_name,
|
| 304 |
+
commit_message="Initial model upload"
|
| 305 |
+
)
|
| 306 |
+
|
| 307 |
+
# Upload GGUF (jeśli istnieje)
|
| 308 |
+
if os.path.exists("my_custom_model.gguf"):
|
| 309 |
+
api.upload_file(
|
| 310 |
+
path_or_fileobj="my_custom_model.gguf",
|
| 311 |
+
path_in_repo="my_custom_model.gguf",
|
| 312 |
+
repo_id=model_name,
|
| 313 |
+
commit_message="Add GGUF version"
|
| 314 |
+
)
|
| 315 |
+
|
| 316 |
+
print(f"🎉 Model published: https://huggingface.co/{model_name}")
|
| 317 |
+
|
| 318 |
+
if __name__ == "__main__":
|
| 319 |
+
publish_to_hf()
|
| 320 |
+
'''
|
| 321 |
+
|
| 322 |
+
with open("publish_to_hf.py", "w") as f:
|
| 323 |
+
f.write(hf_script)
|
| 324 |
+
|
| 325 |
+
print("✅ Created publish_to_hf.py")
|
| 326 |
+
|
| 327 |
+
# === GŁÓWNA FUNKCJA ===
|
| 328 |
+
|
| 329 |
+
def main():
|
| 330 |
+
"""Pełny pipeline tworzenia własnego modelu"""
|
| 331 |
+
|
| 332 |
+
print("🎯 Custom LLM Creation Pipeline")
|
| 333 |
+
print("===============================")
|
| 334 |
+
|
| 335 |
+
choice = input("""
|
| 336 |
+
Wybierz opcję:
|
| 337 |
+
1. Stwórz sample dataset
|
| 338 |
+
2. Fine-tune model (wymaga GPU)
|
| 339 |
+
3. Konwertuj do GGUF
|
| 340 |
+
4. Utwórz Modelfile dla Ollama
|
| 341 |
+
5. Przygotuj skrypty publikacji
|
| 342 |
+
6. Pełny pipeline (1-5)
|
| 343 |
+
|
| 344 |
+
Wybór (1-6): """).strip()
|
| 345 |
+
|
| 346 |
+
if choice == "1":
|
| 347 |
+
create_sample_dataset()
|
| 348 |
+
elif choice == "2":
|
| 349 |
+
fine_tune_model()
|
| 350 |
+
elif choice == "3":
|
| 351 |
+
convert_to_gguf()
|
| 352 |
+
elif choice == "4":
|
| 353 |
+
create_ollama_modelfile()
|
| 354 |
+
elif choice == "5":
|
| 355 |
+
create_hf_publish_script()
|
| 356 |
+
elif choice == "6":
|
| 357 |
+
print("🚀 Running full pipeline...")
|
| 358 |
+
create_sample_dataset()
|
| 359 |
+
|
| 360 |
+
if input("Continue with fine-tuning? (y/N): ").lower() == 'y':
|
| 361 |
+
fine_tune_model()
|
| 362 |
+
convert_to_gguf()
|
| 363 |
+
|
| 364 |
+
create_ollama_modelfile()
|
| 365 |
+
create_model_in_ollama()
|
| 366 |
+
create_hf_publish_script()
|
| 367 |
+
|
| 368 |
+
print("✅ Full pipeline completed!")
|
| 369 |
+
else:
|
| 370 |
+
print("Invalid choice")
|
| 371 |
+
|
| 372 |
+
if __name__ == "__main__":
|
| 373 |
+
main()
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version: '3.8'
|
| 2 |
+
|
| 3 |
+
# Load environment variables from .env file
|
| 4 |
+
x-env: &env
|
| 5 |
+
env_file: .env
|
| 6 |
+
environment:
|
| 7 |
+
- OLLAMA_PORT=${OLLAMA_PORT:-11434}
|
| 8 |
+
- OLLAMA_HOST=${OLLAMA_HOST:-0.0.0.0}
|
| 9 |
+
- STREAMLIT_SERVER_PORT=${STREAMLIT_SERVER_PORT:-8501}
|
| 10 |
+
- STREAMLIT_SERVER_ADDRESS=${STREAMLIT_SERVER_ADDRESS:-0.0.0.0}
|
| 11 |
+
- DEFAULT_MODEL=${DEFAULT_MODEL:-mistral:7b-instruct}
|
| 12 |
+
|
| 13 |
+
services:
|
| 14 |
+
# Ollama - LLM Engine (minimalna konfiguracja)
|
| 15 |
+
ollama:
|
| 16 |
+
image: ollama/ollama:latest
|
| 17 |
+
container_name: ollama-engine
|
| 18 |
+
<<: *env
|
| 19 |
+
ports:
|
| 20 |
+
- "${OLLAMA_PORT:-11434}:11434"
|
| 21 |
+
volumes:
|
| 22 |
+
- ollama_data:/root/.ollama
|
| 23 |
+
# deploy:
|
| 24 |
+
# resources:
|
| 25 |
+
# reservations:
|
| 26 |
+
# devices:
|
| 27 |
+
# - driver: nvidia
|
| 28 |
+
# count: 1
|
| 29 |
+
# capabilities: [gpu]
|
| 30 |
+
restart: unless-stopped
|
| 31 |
+
|
| 32 |
+
# Streamlit UI (Python frontend)
|
| 33 |
+
streamlit-ui:
|
| 34 |
+
build: .
|
| 35 |
+
container_name: llm-ui
|
| 36 |
+
<<: *env
|
| 37 |
+
ports:
|
| 38 |
+
- "${STREAMLIT_SERVER_PORT:-8501}:8501"
|
| 39 |
+
depends_on:
|
| 40 |
+
- ollama
|
| 41 |
+
environment:
|
| 42 |
+
- OLLAMA_URL=http://ollama:${OLLAMA_PORT:-11434}
|
| 43 |
+
- STREAMLIT_SERVER_PORT=${STREAMLIT_SERVER_PORT:-8501}
|
| 44 |
+
- STREAMLIT_SERVER_ADDRESS=${STREAMLIT_SERVER_ADDRESS:-0.0.0.0}
|
| 45 |
+
restart: unless-stopped
|
| 46 |
+
volumes:
|
| 47 |
+
- ./app:/app
|
| 48 |
+
healthcheck:
|
| 49 |
+
test: ["CMD", "curl", "-f", "http://localhost:${STREAMLIT_SERVER_PORT:-8501}/_stcore/health"]
|
| 50 |
+
interval: 30s
|
| 51 |
+
timeout: 10s
|
| 52 |
+
retries: 3
|
| 53 |
+
|
| 54 |
+
# Model Setup (inicjalizacja modelu)
|
| 55 |
+
model-setup:
|
| 56 |
+
image: ollama/ollama:latest
|
| 57 |
+
container_name: model-setup
|
| 58 |
+
<<: *env
|
| 59 |
+
depends_on:
|
| 60 |
+
- ollama
|
| 61 |
+
volumes:
|
| 62 |
+
- ollama_data:/root/.ollama
|
| 63 |
+
environment:
|
| 64 |
+
- OLLAMA_HOST=http://ollama:${OLLAMA_PORT:-11434}
|
| 65 |
+
- DEFAULT_MODEL=${DEFAULT_MODEL:-mistral:7b-instruct}
|
| 66 |
+
command: >
|
| 67 |
+
sh -c "
|
| 68 |
+
echo 'Waiting for Ollama to be ready...' &&
|
| 69 |
+
sleep 10 &&
|
| 70 |
+
ollama pull ${DEFAULT_MODEL:-mistral:7b-instruct} &&
|
| 71 |
+
echo 'Model loaded successfully!'
|
| 72 |
+
"
|
| 73 |
+
restart: "no"
|
| 74 |
+
|
| 75 |
+
volumes:
|
| 76 |
+
ollama_data:
|
example_training_data.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"instruction": "Jak zoptymalizować kod Python dla machine learning?",
|
| 4 |
+
"input": "",
|
| 5 |
+
"output": "Kluczowe optymalizacje dla ML w Pythonie:\n\n1. **Vectoryzacja z NumPy/Pandas**:\n - Unikaj pętli Python, używaj operacji wektorowych\n - `df.apply()` zamiast iteracji przez wiersze\n\n2. **Wykorzystanie GPU**:\n - PyTorch/TensorFlow z CUDA\n - CuPy dla operacji NumPy na GPU\n\n3. **Multiprocessing**:\n - `multiprocessing.Pool` dla CPU-intensive tasks\n - `joblib.Parallel` dla sklearn\n\n4. **Memory management**:\n - `del` niepotrzebnych zmiennych\n - `gc.collect()` dla garbage collection\n - Chunking dla dużych datasets\n\n5. **Profiling**:\n - `cProfile` do znajdowania bottlenecków\n - `memory_profiler` dla użycia pamięci"
|
| 6 |
+
},
|
| 7 |
+
{
|
| 8 |
+
"instruction": "Wyjaśnij Docker dla początkujących",
|
| 9 |
+
"input": "chcę zrozumieć podstawy",
|
| 10 |
+
"output": "Docker to narzędzie do konteneryzacji aplikacji. Podstawowe koncepty:\n\n**Co to jest kontener?**\n- Lekka, przenośna \"paczka\" z aplikacją i wszystkimi jej zależnościami\n- Działa identycznie na każdym systemie\n\n**Podstawowe komendy:**\n```bash\n# Uruchom kontener\ndocker run nginx\n\n# Lista działających kontenerów\ndocker ps\n\n# Zbuduj obraz z Dockerfile\ndocker build -t myapp .\n\n# Zatrzymaj kontener\ndocker stop <container_id>\n```\n\n**Dockerfile przykład:**\n```dockerfile\nFROM python:3.11\nCOPY . /app\nWORKDIR /app\nRUN pip install -r requirements.txt\nCMD [\"python\", \"app.py\"]\n```\n\n**Zalety:**\n- Spójność środowiska (dev = prod)\n- Łatwe deployowanie\n- Izolacja aplikacji\n- Skalowalność"
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"instruction": "Jak stworzyć REST API w FastAPI?",
|
| 14 |
+
"input": "podstawowy przykład",
|
| 15 |
+
"output": "Oto kompletny przykład REST API w FastAPI:\n\n```python\nfrom fastapi import FastAPI, HTTPException\nfrom pydantic import BaseModel\nfrom typing import List, Optional\n\napp = FastAPI(title=\"My API\", version=\"1.0.0\")\n\n# Model danych\nclass Item(BaseModel):\n id: Optional[int] = None\n name: str\n price: float\n description: Optional[str] = None\n\n# \"Baza danych\"\nitems = []\n\n# Endpoints\n@app.get(\"/\")\ndef read_root():\n return {\"message\": \"Hello World\"}\n\n@app.get(\"/items\", response_model=List[Item])\ndef get_items():\n return items\n\n@app.get(\"/items/{item_id}\", response_model=Item)\ndef get_item(item_id: int):\n item = next((x for x in items if x.id == item_id), None)\n if not item:\n raise HTTPException(status_code=404, detail=\"Item not found\")\n return item\n\n@app.post(\"/items\", response_model=Item)\ndef create_item(item: Item):\n item.id = len(items) + 1\n items.append(item)\n return item\n\n@app.put(\"/items/{item_id}\", response_model=Item)\ndef update_item(item_id: int, updated_item: Item):\n for i, item in enumerate(items):\n if item.id == item_id:\n updated_item.id = item_id\n items[i] = updated_item\n return updated_item\n raise HTTPException(status_code=404, detail=\"Item not found\")\n\n@app.delete(\"/items/{item_id}\")\ndef delete_item(item_id: int):\n global items\n items = [x for x in items if x.id != item_id]\n return {\"message\": \"Item deleted\"}\n\n# Uruchomienie\nif __name__ == \"__main__\":\n import uvicorn\n uvicorn.run(app, host=\"0.0.0.0\", port=8000)\n```\n\n**Uruchomienie:**\n```bash\npip install fastapi uvicorn\npython main.py\n```\n\n**Automatyczna dokumentacja:**\n- Swagger UI: http://localhost:8000/docs\n- ReDoc: http://localhost:8000/redoc"
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"instruction": "Wyjaśnij różnice między AI, ML i Deep Learning",
|
| 19 |
+
"input": "",
|
| 20 |
+
"output": "**Hierarchia pojęć (od najszerszego):**\n\n🧠 **Artificial Intelligence (AI)**\n- Najszersze pojęcie\n- Systemy zdolne do wykonywania zadań wymagających \"inteligencji\"\n- Przykłady: chatboty, systemy eksperckie, algorytmy gier\n\n🤖 **Machine Learning (ML) ⊂ AI**\n- Podzbiór AI\n- Algorytmy uczące się z danych bez jawnego programowania\n- Główne typy:\n - **Supervised**: uczenie z etykietowanymi danymi\n - **Unsupervised**: znajdowanie wzorców w danych\n - **Reinforcement**: uczenie przez nagrody/kary\n\n🧬 **Deep Learning (DL) ⊂ ML**\n- Podzbiór ML\n- Sieci neuronowe z wieloma warstwami (\"głębokie\")\n- Automatyczne wyodrębnianie cech z danych\n- Przykłady: CNN (obrazy), RNN (sekwencje), Transformers (tekst)\n\n**Praktyczne różnice:**\n\n| Aspekt | AI | ML | Deep Learning |\n|--------|----|----|---------------|\n| **Dane** | Reguły+dane | Duże zbiory | Bardzo duże zbiory |\n| **Wydajność** | Zależna od reguł | Dobra | Najlepsza (z danymi) |\n| **Interpretacja** | Jasna | Średnia | Trudna (black box) |\n| **Moc obliczeniowa** | Niska | Średnia | Wysoka |\n\n**Przykłady:**\n- **AI bez ML**: System ekspercki w medycynie (reguły if-then)\n- **ML bez DL**: Drzewo decyzyjne, SVM, regresja\n- **Deep Learning**: GPT, BERT, rozpoznawanie obrazów"
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
"instruction": "Jak debugging w Pythonie?",
|
| 24 |
+
"input": "najlepsze praktyki",
|
| 25 |
+
"output": "**Kompletny guide do debuggingu w Pythonie:**\n\n## 1. **Print debugging (podstawa)**\n```python\n# Klasyczne print\nprint(f\"Zmienna x = {x}, typ: {type(x)}\")\n\n# Lepsze print z kontekstem\nprint(f\"DEBUG [funkcja_name:42]: x={x}\")\n\n# Print z formatowaniem JSON\nimport json\nprint(json.dumps(data, indent=2, ensure_ascii=False))\n```\n\n## 2. **Python Debugger (pdb)**\n```python\n# Wstaw breakpoint\nimport pdb; pdb.set_trace()\n\n# Python 3.7+\nbreakpoint() # Equivalent do pdb.set_trace()\n\n# Komendy w pdb:\n# n (next line)\n# s (step into)\n# c (continue)\n# l (list code)\n# p variable_name (print variable)\n# pp variable_name (pretty print)\n# q (quit)\n```\n\n## 3. **Logging (profesjonalne)**\n```python\nimport logging\n\n# Konfiguracja\nlogging.basicConfig(\n level=logging.DEBUG,\n format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'\n)\n\nlogger = logging.getLogger(__name__)\n\n# Użycie\nlogger.debug(\"Szczegółowe info dla debuggingu\")\nlogger.info(\"Ogólne informacje\")\nlogger.warning(\"Ostrzeżenie\")\nlogger.error(\"Błąd\")\nlogger.critical(\"Krytyczny błąd\")\n```\n\n## 4. **IDE Debuggers**\n- **VS Code**: F5 → Run and Debug\n- **PyCharm**: Klik na marginesie → Add Breakpoint\n- **Jupyter**: `%debug` magic command\n\n## 5. **Specialized Tools**\n```python\n# Memory profiling\nfrom memory_profiler import profile\n\n@profile\ndef my_function():\n # kod do profilowania\n pass\n\n# Performance profiling\nimport cProfile\ncProfile.run('my_function()')\n\n# Exception debugging\nimport traceback\ntry:\n risky_code()\nexcept Exception as e:\n traceback.print_exc()\n # lub logger.exception(\"Error occurred\")\n```\n\n## 6. **Best Practices**\n- **Używaj assertions**: `assert x > 0, \"x must be positive\"`\n- **Type hints**: Pomaga IDE w debuggingu\n- **Unit testy**: Wczesne wykrywanie błędów\n- **Linting**: `flake8`, `pylint`, `black`\n- **Exception handling**: Zawsze obsługuj wyjątki\n\n## 7. **Quick Debug Tricks**\n```python\n# Inspect object\nvars(object) # Wszystkie atrybuty\ndir(object) # Metody i atrybuty\n\n# Check if variable exists\nif 'variable' in locals():\n print(variable)\n\n# Pretty print data structures\nfrom pprint import pprint\npprint(complex_dict)\n```"
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"instruction": "Jak zacząć z RTX 3050 i machine learning?",
|
| 29 |
+
"input": "pierwszy projekt",
|
| 30 |
+
"output": "**Kompletny przewodnik ML na RTX 3050:**\n\n## 🎯 **Setup środowiska**\n\n```bash\n# 1. CUDA i cuDNN\n# Pobierz CUDA 11.8 z nvidia.com\n# Sprawdź: nvidia-smi\n\n# 2. Python environment\nconda create -n ml python=3.10\nconda activate ml\n\n# 3. PyTorch z CUDA\npip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118\n\n# 4. ML libraries\npip install pandas numpy matplotlib scikit-learn jupyter\n\n# Test GPU\npython -c \"import torch; print(f'CUDA: {torch.cuda.is_available()}')\"\n```\n\n## 🚀 **Pierwszy projekt: Klasyfikacja obrazów**\n\n```python\nimport torch\nimport torch.nn as nn\nimport torchvision\nfrom torch.utils.data import DataLoader\nimport matplotlib.pyplot as plt\n\n# 1. Sprawdź GPU\ndevice = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\nprint(f'Using device: {device}')\n\n# 2. Dataset (CIFAR-10)\ntransform = torchvision.transforms.Compose([\n torchvision.transforms.ToTensor(),\n torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n])\n\ntrain_dataset = torchvision.datasets.CIFAR10(\n root='./data', train=True, download=True, transform=transform\n)\ntest_dataset = torchvision.datasets.CIFAR10(\n root='./data', train=False, download=True, transform=transform\n)\n\n# 3. DataLoaders (optymalizowane dla RTX 3050)\ntrain_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)\ntest_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)\n\n# 4. Prosty CNN model\nclass SimpleCNN(nn.Module):\n def __init__(self):\n super(SimpleCNN, self).__init__()\n self.conv1 = nn.Conv2d(3, 32, 3, padding=1)\n self.conv2 = nn.Conv2d(32, 64, 3, padding=1)\n self.pool = nn.MaxPool2d(2, 2)\n self.fc1 = nn.Linear(64 * 8 * 8, 512)\n self.fc2 = nn.Linear(512, 10)\n self.dropout = nn.Dropout(0.5)\n\n def forward(self, x):\n x = self.pool(torch.relu(self.conv1(x)))\n x = self.pool(torch.relu(self.conv2(x)))\n x = x.view(-1, 64 * 8 * 8)\n x = torch.relu(self.fc1(x))\n x = self.dropout(x)\n x = self.fc2(x)\n return x\n\n# 5. Training setup\nmodel = SimpleCNN().to(device)\ncriterion = nn.CrossEntropyLoss()\noptimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n\n# 6. Training loop\nnum_epochs = 5\nfor epoch in range(num_epochs):\n model.train()\n running_loss = 0.0\n \n for i, (images, labels) in enumerate(train_loader):\n images, labels = images.to(device), labels.to(device)\n \n optimizer.zero_grad()\n outputs = model(images)\n loss = criterion(outputs, labels)\n loss.backward()\n optimizer.step()\n \n running_loss += loss.item()\n \n if (i+1) % 100 == 0:\n print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}], Loss: {loss.item():.4f}')\n \n print(f'Epoch [{epoch+1}] Average Loss: {running_loss/len(train_loader):.4f}')\n\n# 7. Evaluation\nmodel.eval()\ncorrect = 0\ntotal = 0\n\nwith torch.no_grad():\n for images, labels in test_loader:\n images, labels = images.to(device), labels.to(device)\n outputs = model(images)\n _, predicted = torch.max(outputs.data, 1)\n total += labels.size(0)\n correct += (predicted == labels).sum().item()\n\nprint(f'Test Accuracy: {100 * correct / total:.2f}%')\n```\n\n## ⚡ **Optymalizacje dla RTX 3050**\n\n```python\n# 1. Mixed Precision Training (oszczędza VRAM)\nfrom torch.cuda.amp import autocast, GradScaler\n\nscaler = GradScaler()\n\n# W training loop:\nwith autocast():\n outputs = model(images)\n loss = criterion(outputs, labels)\n\nscaler.scale(loss).backward()\nscaler.step(optimizer)\nscaler.update()\n\n# 2. Gradient Checkpointing\nmodel.gradient_checkpointing_enable()\n\n# 3. DataLoader optymalizacje\ntrain_loader = DataLoader(\n dataset, \n batch_size=32, # Zmniejsz jeśli brakuje VRAM\n num_workers=2, # 2-4 dla RTX 3050\n pin_memory=True, # Szybszy transfer CPU->GPU\n persistent_workers=True # PyTorch 1.7+\n)\n```\n\n## 📊 **Monitoring GPU**\n\n```python\n# Memory monitoring\ndef print_gpu_memory():\n if torch.cuda.is_available():\n print(f'GPU Memory: {torch.cuda.memory_allocated()/1024**3:.1f}GB / {torch.cuda.max_memory_allocated()/1024**3:.1f}GB')\n torch.cuda.reset_peak_memory_stats()\n\n# Użyj po każdej epoce\nprint_gpu_memory()\n```\n\n## 🎯 **Następne projekty:**\n\n1. **NLP**: Fine-tuning BERT na polskich tekstach\n2. **Computer Vision**: Object detection z YOLO\n3. **Time Series**: Predykcja cen akcji z LSTM\n4. **Generative**: Treninig mini-GAN\n5. **Reinforcement Learning**: Gra w Atari\n\n## 💡 **Tips dla RTX 3050:**\n\n- **Batch size**: Zacznij od 32-64, dostosuj do VRAM\n- **Model size**: Używaj mniejszych modeli (ResNet18 zamiast ResNet152)\n- **Mixed precision**: Zawsze włączaj dla większych modeli\n- **Transfer learning**: Używaj pre-trained modeli zamiast trenować od zera\n- **Gradient accumulation**: Jeśli musisz mały batch size\n\n**RTX 3050 = świetna karta do nauki ML! 8GB VRAM wystarczy na większość projektów edukacyjnych.**"
|
| 31 |
+
}
|
| 32 |
+
]
|
model_requirements.txt
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Fine-tuning requirements
|
| 2 |
+
torch>=2.2.2
|
| 3 |
+
transformers>=4.39.0
|
| 4 |
+
datasets>=2.18.0
|
| 5 |
+
peft>=0.10.0
|
| 6 |
+
accelerate>=0.29.0
|
| 7 |
+
bitsandbytes>=0.43.0
|
| 8 |
+
huggingface_hub>=0.22.0
|
| 9 |
+
|
| 10 |
+
# For data processing
|
| 11 |
+
pandas>=2.2.0
|
| 12 |
+
numpy>=1.26.0
|
| 13 |
+
|
| 14 |
+
# For model publishing
|
| 15 |
+
huggingface_hub[cli]>=0.22.0
|
| 16 |
+
|
| 17 |
+
# Optional: for advanced training
|
| 18 |
+
wandb>=0.17.0
|
| 19 |
+
tensorboard>=2.16.0
|
quick-start.sh
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# 🚀 Minimal LLM Setup - Everything in one script!
|
| 4 |
+
|
| 5 |
+
set -e
|
| 6 |
+
|
| 7 |
+
# Colors
|
| 8 |
+
RED='\033[0;31m'
|
| 9 |
+
GREEN='\033[0;32m'
|
| 10 |
+
YELLOW='\033[1;33m'
|
| 11 |
+
BLUE='\033[0;34m'
|
| 12 |
+
NC='\033[0m' # No Color
|
| 13 |
+
|
| 14 |
+
echo -e "${BLUE}🚀 Minimal LLM Setup${NC}"
|
| 15 |
+
echo "===================="
|
| 16 |
+
|
| 17 |
+
# Check Docker
|
| 18 |
+
if ! command -v docker &> /dev/null; then
|
| 19 |
+
echo -e "${RED}❌ Docker not found. Please install Docker first.${NC}"
|
| 20 |
+
exit 1
|
| 21 |
+
fi
|
| 22 |
+
|
| 23 |
+
# Check Docker Compose
|
| 24 |
+
if ! docker compose version &> /dev/null; then
|
| 25 |
+
echo -e "${RED}❌ Docker Compose not found. Please install Docker Compose.${NC}"
|
| 26 |
+
exit 1
|
| 27 |
+
fi
|
| 28 |
+
|
| 29 |
+
# Check NVIDIA Docker (optional)
|
| 30 |
+
if docker run --rm --gpus all nvidia/cuda:11.8-base-ubuntu22.04 nvidia-smi &>/dev/null; then
|
| 31 |
+
echo -e "${GREEN}✅ NVIDIA Docker detected${NC}"
|
| 32 |
+
GPU_AVAILABLE=true
|
| 33 |
+
else
|
| 34 |
+
echo -e "${YELLOW}⚠️ No GPU detected, running on CPU${NC}"
|
| 35 |
+
GPU_AVAILABLE=false
|
| 36 |
+
fi
|
| 37 |
+
|
| 38 |
+
# Create project structure
|
| 39 |
+
echo -e "${BLUE}📁 Creating project structure...${NC}"
|
| 40 |
+
mkdir -p app
|
| 41 |
+
|
| 42 |
+
# Create minimal Streamlit app if it doesn't exist
|
| 43 |
+
if [ ! -f "app/main.py" ]; then
|
| 44 |
+
echo -e "${BLUE}📝 Creating Streamlit app...${NC}"
|
| 45 |
+
# The file content would be copied here in real scenario
|
| 46 |
+
echo "# Streamlit app created. Copy the main.py content here."
|
| 47 |
+
fi
|
| 48 |
+
|
| 49 |
+
# Modify docker-compose for CPU if no GPU
|
| 50 |
+
if [ "$GPU_AVAILABLE" = false ]; then
|
| 51 |
+
echo -e "${YELLOW}🔧 Configuring for CPU mode...${NC}"
|
| 52 |
+
sed -i 's/deploy:/# deploy:/g' docker-compose.yml || true
|
| 53 |
+
sed -i 's/resources:/# resources:/g' docker-compose.yml || true
|
| 54 |
+
sed -i 's/reservations:/# reservations:/g' docker-compose.yml || true
|
| 55 |
+
sed -i 's/devices:/# devices:/g' docker-compose.yml || true
|
| 56 |
+
sed -i 's/- driver: nvidia/# - driver: nvidia/g' docker-compose.yml || true
|
| 57 |
+
sed -i 's/count: 1/# count: 1/g' docker-compose.yml || true
|
| 58 |
+
sed -i 's/capabilities: \[gpu\]/# capabilities: [gpu]/g' docker-compose.yml || true
|
| 59 |
+
fi
|
| 60 |
+
|
| 61 |
+
# Build and start services
|
| 62 |
+
echo -e "${BLUE}🔨 Building and starting services...${NC}"
|
| 63 |
+
docker compose up --build -d
|
| 64 |
+
|
| 65 |
+
# Wait for services
|
| 66 |
+
echo -e "${BLUE}⏳ Waiting for services to start...${NC}"
|
| 67 |
+
|
| 68 |
+
# Wait for Ollama
|
| 69 |
+
echo -n "Waiting for Ollama"
|
| 70 |
+
for i in {1..30}; do
|
| 71 |
+
if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
|
| 72 |
+
echo -e "${GREEN} ✅${NC}"
|
| 73 |
+
break
|
| 74 |
+
fi
|
| 75 |
+
echo -n "."
|
| 76 |
+
sleep 2
|
| 77 |
+
done
|
| 78 |
+
|
| 79 |
+
# Wait for Streamlit
|
| 80 |
+
echo -n "Waiting for Streamlit"
|
| 81 |
+
for i in {1..30}; do
|
| 82 |
+
if curl -s http://localhost:8501/_stcore/health > /dev/null 2>&1; then
|
| 83 |
+
echo -e "${GREEN} ✅${NC}"
|
| 84 |
+
break
|
| 85 |
+
fi
|
| 86 |
+
echo -n "."
|
| 87 |
+
sleep 2
|
| 88 |
+
done
|
| 89 |
+
|
| 90 |
+
# Check if model download completed
|
| 91 |
+
echo -e "${BLUE}📥 Checking model download...${NC}"
|
| 92 |
+
docker logs model-setup | tail -5
|
| 93 |
+
|
| 94 |
+
echo
|
| 95 |
+
echo -e "${GREEN}🎉 Setup completed!${NC}"
|
| 96 |
+
echo "==================="
|
| 97 |
+
echo
|
| 98 |
+
echo -e "${BLUE}📍 Access points:${NC}"
|
| 99 |
+
echo " • Streamlit UI: http://localhost:8501"
|
| 100 |
+
echo " • Ollama API: http://localhost:11434"
|
| 101 |
+
echo
|
| 102 |
+
echo -e "${BLUE}🔍 Useful commands:${NC}"
|
| 103 |
+
echo " • Check logs: docker compose logs -f"
|
| 104 |
+
echo " • Stop services: docker compose down"
|
| 105 |
+
echo " • Restart: docker compose restart"
|
| 106 |
+
echo " • Shell access: docker exec -it ollama-engine bash"
|
| 107 |
+
echo
|
| 108 |
+
echo -e "${BLUE}🧪 Test API:${NC}"
|
| 109 |
+
echo ' curl -X POST http://localhost:11434/api/generate \'
|
| 110 |
+
echo ' -H "Content-Type: application/json" \'
|
| 111 |
+
echo ' -d '"'"'{"model": "mistral:7b-instruct", "prompt": "Hello!"}'\'
|
| 112 |
+
|
| 113 |
+
# Auto-open browser (optional)
|
| 114 |
+
if command -v xdg-open &> /dev/null; then
|
| 115 |
+
echo
|
| 116 |
+
read -p "Open browser automatically? (y/N): " -n 1 -r
|
| 117 |
+
echo
|
| 118 |
+
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
| 119 |
+
xdg-open http://localhost:8501
|
| 120 |
+
fi
|
| 121 |
+
elif command -v open &> /dev/null; then
|
| 122 |
+
echo
|
| 123 |
+
read -p "Open browser automatically? (y/N): " -n 1 -r
|
| 124 |
+
echo
|
| 125 |
+
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
| 126 |
+
open http://localhost:8501
|
| 127 |
+
fi
|
| 128 |
+
fi
|
| 129 |
+
|
| 130 |
+
echo
|
| 131 |
+
echo -e "${GREEN}Happy chatting! 🤖${NC}"
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit==1.29.0
|
| 2 |
+
requests==2.31.0
|
| 3 |
+
ollama==0.1.7
|
test_converted_model.py
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
🧪 Test script for converted GGUF model
|
| 4 |
+
Tests both llama.cpp and Ollama integration
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import subprocess
|
| 9 |
+
import time
|
| 10 |
+
import requests
|
| 11 |
+
import json
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def test_llamacpp_direct():
|
| 16 |
+
"""Test model directly with llama.cpp"""
|
| 17 |
+
print("🧪 Testing with llama.cpp directly...")
|
| 18 |
+
|
| 19 |
+
model_file = "my_custom_model.gguf"
|
| 20 |
+
if not os.path.exists(model_file):
|
| 21 |
+
print(f"❌ Model file not found: {model_file}")
|
| 22 |
+
return False
|
| 23 |
+
|
| 24 |
+
llamacpp_main = "./llama.cpp/main"
|
| 25 |
+
if not os.path.exists(llamacpp_main):
|
| 26 |
+
print(f"❌ llama.cpp main not found: {llamacpp_main}")
|
| 27 |
+
print("Run: ./convert_to_gguf.sh first")
|
| 28 |
+
return False
|
| 29 |
+
|
| 30 |
+
test_prompts = [
|
| 31 |
+
"Hello, how are you?",
|
| 32 |
+
"Wyjaśnij co to jest Docker",
|
| 33 |
+
"Napisz prostą funkcję w Pythonie"
|
| 34 |
+
]
|
| 35 |
+
|
| 36 |
+
for i, prompt in enumerate(test_prompts, 1):
|
| 37 |
+
print(f"\n--- Test {i}/3: {prompt[:30]}... ---")
|
| 38 |
+
|
| 39 |
+
cmd = [
|
| 40 |
+
llamacpp_main,
|
| 41 |
+
"-m", model_file,
|
| 42 |
+
"-p", prompt,
|
| 43 |
+
"-n", "100",
|
| 44 |
+
"--temp", "0.7",
|
| 45 |
+
"--top-p", "0.9"
|
| 46 |
+
]
|
| 47 |
+
|
| 48 |
+
try:
|
| 49 |
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
|
| 50 |
+
if result.returncode == 0:
|
| 51 |
+
print("✅ Response generated successfully")
|
| 52 |
+
print("Response preview:", result.stdout[:200] + "..." if len(result.stdout) > 200 else result.stdout)
|
| 53 |
+
else:
|
| 54 |
+
print(f"❌ Error: {result.stderr}")
|
| 55 |
+
return False
|
| 56 |
+
except subprocess.TimeoutExpired:
|
| 57 |
+
print("⏰ Timeout - model may be too slow")
|
| 58 |
+
return False
|
| 59 |
+
except Exception as e:
|
| 60 |
+
print(f"❌ Exception: {e}")
|
| 61 |
+
return False
|
| 62 |
+
|
| 63 |
+
return True
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def test_ollama_integration():
|
| 67 |
+
"""Test model through Ollama"""
|
| 68 |
+
print("\n🤖 Testing Ollama integration...")
|
| 69 |
+
|
| 70 |
+
# Check if Ollama is installed
|
| 71 |
+
try:
|
| 72 |
+
result = subprocess.run(["ollama", "list"], capture_output=True, text=True)
|
| 73 |
+
if result.returncode != 0:
|
| 74 |
+
print("❌ Ollama not installed or not running")
|
| 75 |
+
return False
|
| 76 |
+
except FileNotFoundError:
|
| 77 |
+
print("❌ Ollama command not found")
|
| 78 |
+
return False
|
| 79 |
+
|
| 80 |
+
model_name = "my-custom-model"
|
| 81 |
+
|
| 82 |
+
# Check if our custom model exists in Ollama
|
| 83 |
+
if model_name not in result.stdout:
|
| 84 |
+
print(f"⚠️ Model '{model_name}' not found in Ollama")
|
| 85 |
+
print("Create it first:")
|
| 86 |
+
print("1. ollama create my-custom-model -f Modelfile")
|
| 87 |
+
return False
|
| 88 |
+
|
| 89 |
+
print(f"✅ Found model: {model_name}")
|
| 90 |
+
|
| 91 |
+
# Test through Ollama API
|
| 92 |
+
test_prompts = [
|
| 93 |
+
"Cześć! Kim jesteś?",
|
| 94 |
+
"Jak zoptymalizować kod Python?",
|
| 95 |
+
"Co to jest machine learning?"
|
| 96 |
+
]
|
| 97 |
+
|
| 98 |
+
for i, prompt in enumerate(test_prompts, 1):
|
| 99 |
+
print(f"\n--- Ollama Test {i}/3: {prompt[:30]}... ---")
|
| 100 |
+
|
| 101 |
+
try:
|
| 102 |
+
# Test via CLI
|
| 103 |
+
cmd = ["ollama", "run", model_name, prompt]
|
| 104 |
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
|
| 105 |
+
|
| 106 |
+
if result.returncode == 0:
|
| 107 |
+
print("✅ Ollama CLI response successful")
|
| 108 |
+
print("Response preview:", result.stdout[:200] + "..." if len(result.stdout) > 200 else result.stdout)
|
| 109 |
+
else:
|
| 110 |
+
print(f"❌ Ollama CLI error: {result.stderr}")
|
| 111 |
+
continue
|
| 112 |
+
|
| 113 |
+
except subprocess.TimeoutExpired:
|
| 114 |
+
print("⏰ Ollama timeout")
|
| 115 |
+
continue
|
| 116 |
+
except Exception as e:
|
| 117 |
+
print(f"❌ Ollama exception: {e}")
|
| 118 |
+
continue
|
| 119 |
+
|
| 120 |
+
# Test via API
|
| 121 |
+
print("\n🌐 Testing Ollama API...")
|
| 122 |
+
try:
|
| 123 |
+
api_url = "http://localhost:11434/api/generate"
|
| 124 |
+
test_data = {
|
| 125 |
+
"model": model_name,
|
| 126 |
+
"prompt": "Hello! Test API call.",
|
| 127 |
+
"stream": False
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
response = requests.post(api_url, json=test_data, timeout=60)
|
| 131 |
+
if response.status_code == 200:
|
| 132 |
+
data = response.json()
|
| 133 |
+
print("✅ Ollama API response successful")
|
| 134 |
+
print("API Response:", data.get('response', 'No response field')[:100])
|
| 135 |
+
else:
|
| 136 |
+
print(f"❌ API Error: {response.status_code}")
|
| 137 |
+
return False
|
| 138 |
+
|
| 139 |
+
except requests.exceptions.RequestException as e:
|
| 140 |
+
print(f"❌ API Request failed: {e}")
|
| 141 |
+
return False
|
| 142 |
+
|
| 143 |
+
return True
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def benchmark_model():
|
| 147 |
+
"""Simple benchmark of the model"""
|
| 148 |
+
print("\n📊 Running simple benchmark...")
|
| 149 |
+
|
| 150 |
+
model_file = "my_custom_model.gguf"
|
| 151 |
+
if not os.path.exists(model_file):
|
| 152 |
+
print("❌ Model file not found for benchmark")
|
| 153 |
+
return
|
| 154 |
+
|
| 155 |
+
# Get file size
|
| 156 |
+
file_size = os.path.getsize(model_file) / (1024 ** 3) # GB
|
| 157 |
+
print(f"📁 Model size: {file_size:.2f} GB")
|
| 158 |
+
|
| 159 |
+
# Benchmark prompt
|
| 160 |
+
benchmark_prompt = "Explain artificial intelligence in simple terms."
|
| 161 |
+
|
| 162 |
+
llamacpp_main = "./llama.cpp/main"
|
| 163 |
+
if os.path.exists(llamacpp_main):
|
| 164 |
+
print("⏱️ Timing generation speed...")
|
| 165 |
+
|
| 166 |
+
cmd = [
|
| 167 |
+
llamacpp_main,
|
| 168 |
+
"-m", model_file,
|
| 169 |
+
"-p", benchmark_prompt,
|
| 170 |
+
"-n", "100",
|
| 171 |
+
"--temp", "0.7"
|
| 172 |
+
]
|
| 173 |
+
|
| 174 |
+
start_time = time.time()
|
| 175 |
+
try:
|
| 176 |
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
|
| 177 |
+
end_time = time.time()
|
| 178 |
+
|
| 179 |
+
if result.returncode == 0:
|
| 180 |
+
duration = end_time - start_time
|
| 181 |
+
# Rough tokens estimation
|
| 182 |
+
tokens = len(result.stdout.split())
|
| 183 |
+
tokens_per_second = tokens / duration if duration > 0 else 0
|
| 184 |
+
|
| 185 |
+
print(f"⚡ Generation time: {duration:.2f} seconds")
|
| 186 |
+
print(f"🚀 Speed: ~{tokens_per_second:.1f} tokens/second")
|
| 187 |
+
print(f"📝 Generated tokens: ~{tokens}")
|
| 188 |
+
else:
|
| 189 |
+
print("❌ Benchmark failed")
|
| 190 |
+
except subprocess.TimeoutExpired:
|
| 191 |
+
print("⏰ Benchmark timeout")
|
| 192 |
+
|
| 193 |
+
|
| 194 |
+
def main():
|
| 195 |
+
"""Main test runner"""
|
| 196 |
+
print("🧪 Custom Model Test Suite")
|
| 197 |
+
print("=" * 40)
|
| 198 |
+
|
| 199 |
+
# Check prerequisites
|
| 200 |
+
print("🔍 Checking prerequisites...")
|
| 201 |
+
|
| 202 |
+
required_files = [
|
| 203 |
+
"my_custom_model.gguf",
|
| 204 |
+
"./llama.cpp/main",
|
| 205 |
+
"Modelfile"
|
| 206 |
+
]
|
| 207 |
+
|
| 208 |
+
missing_files = [f for f in required_files if not os.path.exists(f)]
|
| 209 |
+
|
| 210 |
+
if missing_files:
|
| 211 |
+
print("❌ Missing required files:")
|
| 212 |
+
for f in missing_files:
|
| 213 |
+
print(f" • {f}")
|
| 214 |
+
print("\nRun these commands first:")
|
| 215 |
+
print("1. python create_custom_model.py # fine-tune model")
|
| 216 |
+
print("2. ./convert_to_gguf.sh # convert to GGUF")
|
| 217 |
+
print("3. ollama create my-custom-model -f Modelfile # import to Ollama")
|
| 218 |
+
return
|
| 219 |
+
|
| 220 |
+
print("✅ All required files found")
|
| 221 |
+
|
| 222 |
+
# Run tests
|
| 223 |
+
tests_passed = 0
|
| 224 |
+
total_tests = 3
|
| 225 |
+
|
| 226 |
+
# Test 1: Direct llama.cpp
|
| 227 |
+
if test_llamacpp_direct():
|
| 228 |
+
tests_passed += 1
|
| 229 |
+
print("✅ llama.cpp test PASSED")
|
| 230 |
+
else:
|
| 231 |
+
print("❌ llama.cpp test FAILED")
|
| 232 |
+
|
| 233 |
+
# Test 2: Ollama integration
|
| 234 |
+
if test_ollama_integration():
|
| 235 |
+
tests_passed += 1
|
| 236 |
+
print("✅ Ollama test PASSED")
|
| 237 |
+
else:
|
| 238 |
+
print("❌ Ollama test FAILED")
|
| 239 |
+
|
| 240 |
+
# Test 3: Benchmark
|
| 241 |
+
benchmark_model()
|
| 242 |
+
tests_passed += 1 # Benchmark always "passes"
|
| 243 |
+
|
| 244 |
+
# Results
|
| 245 |
+
print("\n" + "=" * 40)
|
| 246 |
+
print(f"🎯 Test Results: {tests_passed}/{total_tests} passed")
|
| 247 |
+
|
| 248 |
+
if tests_passed == total_tests:
|
| 249 |
+
print("🎉 All tests passed! Your custom model is ready!")
|
| 250 |
+
print("\n🚀 Next steps:")
|
| 251 |
+
print("• ollama push my-custom-model # Share with the world")
|
| 252 |
+
print("• Integrate into your applications")
|
| 253 |
+
print("• Fine-tune further with more data")
|
| 254 |
+
else:
|
| 255 |
+
print("⚠️ Some tests failed. Check the output above.")
|
| 256 |
+
|
| 257 |
+
# Usage examples
|
| 258 |
+
print("\n📚 Usage Examples:")
|
| 259 |
+
print("# Ollama CLI:")
|
| 260 |
+
print("ollama run my-custom-model 'Your question here'")
|
| 261 |
+
print("\n# Ollama API:")
|
| 262 |
+
print("curl -X POST http://localhost:11434/api/generate \\")
|
| 263 |
+
print(" -H 'Content-Type: application/json' \\")
|
| 264 |
+
print(" -d '{\"model\": \"my-custom-model\", \"prompt\": \"Hello!\"}'")
|
| 265 |
+
|
| 266 |
+
print("\n# Python integration:")
|
| 267 |
+
print("import ollama")
|
| 268 |
+
print("response = ollama.chat(model='my-custom-model', messages=[")
|
| 269 |
+
print(" {'role': 'user', 'content': 'Hello!'}])")
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
if __name__ == "__main__":
|
| 273 |
+
main()
|