softreck commited on Jun 4, 2025

Commit

0fa4cc9

verified ·

1 Parent(s): cc87b41

Upload folder using huggingface_hub

Browse files

Files changed (17) hide show

.env.example +33 -0
.gitignore +197 -0
Dockerfile +20 -0
LICENSE +201 -0
Makefile +119 -0
Modelfile +21 -0
Modelfile.template +41 -0
README.md +388 -3
app/main.py +124 -0
convert_to_gguf.sh +164 -0
create_custom_model.py +373 -0
docker-compose.yml +76 -0
example_training_data.json +32 -0
model_requirements.txt +19 -0
quick-start.sh +131 -0
requirements.txt +3 -0
test_converted_model.py +273 -0

.env.example ADDED Viewed

	@@ -0,0 +1,33 @@

+# Ollama Configuration
+OLLAMA_HOST=0.0.0.0
+OLLAMA_PORT=11436
+# Streamlit Configuration
+STREAMLIT_SERVER_PORT=8501
+STREAMLIT_SERVER_ADDRESS=0.0.0.0
+STREAMLIT_SERVER_HEADLESS=true
+STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
+# Application Settings
+APP_DEBUG=false
+APP_ENV=development
+APP_SECRET_KEY=your-secret-key-here
+# Model Configuration
+DEFAULT_MODEL=mistral:7b-instruct
+MODEL_TEMPERATURE=0.7
+MAX_TOKENS=2000
+# API Configuration (if needed)
+# API_KEY=your-api-key-here
+# API_BASE_URL=http://localhost:11434
+# Database Configuration (if needed)
+# DB_HOST=db
+# DB_PORT=5432
+# DB_NAME=llm_demo
+# DB_USER=postgres
+# DB_PASSWORD=your-db-password
+# CORS Configuration (if needed)
+# CORS_ORIGINS=http://localhost:3000,http://localhost:8501

.gitignore ADDED Viewed

	@@ -0,0 +1,197 @@

+.idea
+.env
+venv
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the enitre vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore

Dockerfile ADDED Viewed

	@@ -0,0 +1,20 @@

+FROM python:3.11-slim
+# Minimalne zależności
+RUN apt-get update && apt-get install -y \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Python dependencies (tylko 3 pakiety!)
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Aplikacja
+WORKDIR /app
+COPY app/ .
+# Port Streamlit
+EXPOSE 8501
+# Uruchomienie
+CMD ["streamlit", "run", "main.py", "--server.address", "0.0.0.0", "--server.port", "8501"]

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

Makefile ADDED Viewed

	@@ -0,0 +1,119 @@

+.PHONY: help install build up down stop restart logs clean test lint format check-env open open-ui open-ollama
+# Default target
+help:
+	@echo "\nLLM Demo - Available commands:\n"
+	@echo "  make install         Install Python dependencies"
+	@echo "  make build           Build Docker containers"
+	@echo "  make up              Start all services in detached mode"
+	@echo "  make down            Stop and remove all containers, networks, and volumes"
+	@echo "  make stop            Stop all running containers"
+	@echo "  make restart         Restart all services"
+	@echo "  make logs            Follow container logs"
+	@echo "  make logs-ollama     Follow Ollama container logs"
+	@echo "  make logs-ui         Follow Streamlit UI logs"
+	@echo "  make clean           Remove all containers, networks, and volumes"
+	@echo "  make test            Run tests"
+	@echo "  make lint            Run linter"
+	@echo "  make format          Format code"
+	@echo "  make shell-ollama    Open shell in Ollama container"
+	@echo "  make shell-ui        Open shell in Streamlit UI container"
+	@echo "  make open           Open all services in browser"
+	@echo "  make open-ui        Open Streamlit UI in browser"
+	@echo "  make open-ollama    Open Ollama API in browser"
+# Check if .env file exists
+check-env:
+	@if [ ! -f .env ]; then \
+		echo "Error: .env file not found. Please create one from .env.example"; \
+		exit 1; \
+	fi
+# Install Python dependencies
+install:
+	@echo "Installing Python dependencies..."
+	python -m pip install --upgrade pip
+	pip install -r requirements.txt
+# Build Docker containers
+build: check-env
+	@echo "Building Docker containers..."
+	docker-compose build
+# Start all services in detached mode
+up: check-env
+	@echo "Starting all services..."
+	docker-compose up -d
+# Stop and remove all containers, networks, and volumes
+down:
+	@echo "Stopping and removing all containers..."
+	docker-compose down -v
+# Stop and remove all containers, networks, and images
+stop:
+	@echo "Stopping and removing all containers, networks, and images..."
+	docker-compose down --rmi all --volumes --remove-orphans
+	@echo "Removing unused Docker resources..."
+	docker system prune -a -f --volumes
+	@echo "Removing all unused Docker networks..."
+	docker network prune -f
+	@echo "Removing all unused Docker volumes..."
+	docker volume prune -f
+# Restart all services
+restart: stop up
+# Follow container logs
+logs:
+	docker-compose logs -f
+# Follow Ollama container logs
+logs-ollama:
+	docker-compose logs -f ollama
+# Follow Streamlit UI logs
+logs-ui:
+	docker-compose logs -f streamlit-ui
+# Alias for stop (for backward compatibility)
+clean: stop
+# Run tests
+test:
+	@echo "Running tests..."
+	# Add your test command here
+	# Example: python -m pytest tests/
+# Lint code
+lint:
+	@echo "Running linter..."
+	# Add your lint command here
+	# Example: pylint app/
+# Format code
+format:
+	@echo "Formatting code..."
+	# Add your format command here
+	# Example: black app/
+# Open shell in Ollama container
+shell-ollama:
+	docker-compose exec ollama /bin/sh
+# Open shell in Streamlit UI container
+shell-ui:
+	docker-compose exec streamlit-ui /bin/sh
+# Open all services in browser
+open: open-ui open-ollama
+# Open Streamlit UI in browser
+open-ui:
+	@echo "Opening Streamlit UI..."
+	@xdg-open http://localhost:8501 2>/dev/null || open http://localhost:8501 2>/dev/null || start http://localhost:8501 2>/dev/null || echo "Could not open the browser. Please open http://localhost:8501 manually"
+# Open Ollama API in browser
+open-ollama:
+	@echo "Opening Ollama API..."
+	@xdg-open http://localhost:11436 2>/dev/null || open http://localhost:11436 2>/dev/null || start http://localhost:11436 2>/dev/null || echo "Could not open the browser. Please open http://localhost:11436 manually"

Modelfile ADDED Viewed

	@@ -0,0 +1,21 @@

+FROM ./my_custom_model.gguf
+# Model metadata
+PARAMETER temperature 0.7
+PARAMETER top_p 0.9
+PARAMETER top_k 40
+PARAMETER num_ctx 2048
+# System prompt
+SYSTEM "Jesteś pomocnym asystentem AI stworzonym specjalnie dla polskich użytkowników.
+Odpowiadasz w języku polskim, jesteś precyzyjny i pomocny.
+Specjalizujesz się w programowaniu, technologii i sztucznej inteligencji."
+# Chat template dla Mistral
+TEMPLATE "<s>[INST] {{ if .System }}{{ .System }}{{ end }}{{ .Prompt }} [/INST] {{ .Response }}</s>"
+# Metadata
+PARAMETER num_predict 256
+PARAMETER stop "<s>"
+PARAMETER stop "[INST]"
+PARAMETER stop "[/INST]"

Modelfile.template ADDED Viewed

	@@ -0,0 +1,41 @@

+# Modelfile for custom fine-tuned Mistral model
+# Generated by convert_to_gguf.sh
+FROM ./my_custom_model.gguf
+# Model parameters optimized for RTX 3050
+PARAMETER temperature 0.7
+PARAMETER top_p 0.9
+PARAMETER top_k 40
+PARAMETER num_ctx 2048
+PARAMETER num_predict 512
+PARAMETER repeat_penalty 1.1
+PARAMETER repeat_last_n 64
+# Stop tokens for Mistral format
+PARAMETER stop "<s>"
+PARAMETER stop "[INST]"
+PARAMETER stop "[/INST]"
+PARAMETER stop "</s>"
+# System prompt - customize this for your use case
+SYSTEM """Jesteś pomocnym asystentem AI stworzonym przez fine-tuning modelu Mistral 7B.
+Twoje specjalizacje:
+- Programowanie w Pythonie
+- Machine Learning i AI
+- Docker i DevOps
+- Wyjaśnianie technicznych konceptów
+Odpowiadasz w języku polskim, jesteś precyzyjny i podajesz praktyczne przykłady.
+Gdy nie znasz odpowiedzi, uczciwie to przyznajesz.
+"""
+# Chat template for Mistral Instruct format
+TEMPLATE """<s>{{- if .System }}[INST] {{ .System }}
+{{ .Prompt }} [/INST]{{ else }}[INST] {{ .Prompt }} [/INST]{{ end }} {{ .Response }}</s>"""
+# Model metadata
+PARAMETER num_thread 4
+PARAMETER num_gpu_layers 20

README.md CHANGED Viewed

@@ -1,3 +1,388 @@
----
-license: apache-2.0
----

+# 🚀 Minimalne LLM + Własny Model - Kompletny Guide
+## 🎯 **CZĘŚĆ 1: Uruchomienie w 2 minuty**
+### Szybki start (minimalne rozwiązanie)
+```bash
+# 1. Sklonuj pliki
+git clone <your-repo>
+cd minimal-llm
+# 2. Uruchom wszystko jedną komendą
+chmod +x quick-start.sh
+./quick-start.sh
+# 3. Otwórz przeglądarkę
+# http://localhost:8501 - Streamlit UI
+# http://localhost:11434 - Ollama API
+```
+### Co się dzieje pod spodem?
+- **Ollama** - pobiera i uruchamia Mistral 7B
+- **Streamlit** - prosty chat interface
+- **Docker** - wszystko w kontenerach
+- **Minimalne zależności** - tylko 3 pakiety Python!
+## 📁 **Struktura projektu (minimalna)**
+```
+minimal-llm/
+├── docker-compose.yml       # 1 plik - cała infrastruktura
+├── Dockerfile              # Minimalne image
+├── requirements.txt         # 3 pakiety
+├── quick-start.sh          # 1 komenda = pełny setup
+└── app/
+    └── main.py             # 50 linijek = pełny chat
+```
+## 🎯 **CZĘŚĆ 2: Stwórz własny model LLM**
+### Krok 1: Przygotowanie środowiska
+```bash
+# Instalacja zależności do fine-tuningu
+pip install -r model_requirements.txt
+# Login do Hugging Face (do publikacji)
+huggingface-cli login
+```
+### Krok 2: Przygotowanie danych
+```bash
+python create_custom_model.py
+# Wybierz opcję 1: Stwórz sample dataset
+```
+Przykład danych treningowych:
+```json
+[
+  {
+    "instruction": "Jak nazywa się stolica Polski?",
+    "input": "",
+    "output": "Stolica Polski to Warszawa."
+  },
+  {
+    "instruction": "Wyjaśnij czym jest sztuczna inteligencja",
+    "input": "",
+    "output": "Sztuczna inteligencja (AI) to dziedzina informatyki..."
+  }
+]
+```
+### Krok 3: Fine-tuning modelu
+```bash
+# Uruchom fine-tuning (wymaga GPU)
+python create_custom_model.py
+# Wybierz opcję 2: Fine-tune model
+# Lub pełny pipeline
+python create_custom_model.py
+# Wybierz opcję 6: Pełny pipeline
+```
+**Optymalizacje dla RTX 3050:**
+- 4-bit quantization
+- LoRA (Low-Rank Adaptation)
+- Batch size = 1
+- Gradient accumulation = 4
+- Mixed precision (FP16)
+### Krok 4: Konwersja do GGUF
+```bash
+# Automatycznie generowany skrypt
+./convert_to_gguf.sh
+```
+### Krok 5: Stworzenie modelu w Ollama
+```bash
+# Utwórz Modelfile
+python create_custom_model.py  # wybierz opcję 4
+# Stwórz model w Ollama
+ollama create wronai -f Modelfile
+# Uruchom model
+ollama run wronai
+```
+### Uruchamianie skryptu
+Skrypt `create_custom_model.py` oferuje interaktywne menu z następującymi opcjami:
+```bash
+python create_custom_model.py
+```
+Dostępne opcje:
+1. Stwórz przykładowy dataset
+2. Wykonaj fine-tuning modelu
+3. Konwertuj model do formatu GGUF
+4. Utwórz Modelfile dla Ollama
+5. Opublikuj model na Hugging Face
+6. Wykonaj pełny pipeline (1-5)
+### Wymagania wstępne
+- Python 3.8+
+- PyTorch z obsługą CUDA (zalecane)
+- Biblioteki wymienione w `model_requirements.txt`
+- Konto na [Hugging Face](https://huggingface.co/) (do publikacji modelu)
+### Rozwiązywanie problemów
+#### Błąd składni w skrypcie
+Jeśli napotkasz błąd składni, upewnij się, że:
+1. Używasz Pythona 3.8 lub nowszego
+2. Wszystkie zależności są zainstalowane
+3. Plik nie został uszkodzony podczas pobierania
+#### Problemy z zależnościami
+```bash
+# Utwórz i aktywuj środowisko wirtualne
+python -m venv .venv
+source .venv/bin/activate  # Linux/Mac
+.venv\Scripts\activate    # Windows
+# Zainstaluj zależności
+pip install -r model_requirements.txt
+```
+#### Brakujące uprawnienia
+Jeśli napotkasz problemy z uprawnieniami, spróbuj:
+```bash
+# Nadaj uprawnienia do wykonywania skryptów
+chmod +x *.sh
+# Uruchom z uprawnieniami administratora (jeśli potrzebne)
+sudo python create_custom_model.py
+```
+### Kontrybucja
+Zapraszamy do zgłaszania problemów i propozycji zmian poprzez Issues i Pull Requests.
+# Test modelu
+ollama run wronai "Cześć! Kim jesteś?"
+```
+### Krok 6: Publikacja modelu
+#### **Opcja A: Ollama Registry**
+```bash
+# Push do Ollama Library
+ollama push wronai
+# Teraz każdy może użyć:
+ollama pull your-username/wronai
+```
+#### **Opcja B: Hugging Face Hub**
+```bash
+# Publikacja na HF
+python publish_to_hf.py
+# Model dostępny na:
+# https://huggingface.co/your-username/my-custom-mistral-7b
+```
+#### **Opcja C: Docker Registry**
+```bash
+# Spakuj do Docker image
+docker build -t my-custom-llm .
+docker tag my-custom-llm your-registry/my-custom-llm
+docker push your-registry/my-custom-llm
+```
+## 🎯 **CZĘŚĆ 3: Gotowe alternatywy (zero kodu)**
+### **1. Najprostsze - Ollama**
+```bash
+# Instalacja
+curl -fsSL https://ollama.ai/install.sh | sh
+# Uruchomienie modelu
+ollama run mistral:7b-instruct
+# API automatycznie na localhost:11434
+```
+### **2. Hugging Face Inference API**
+```python
+import requests
+headers = {"Authorization": "Bearer YOUR_HF_TOKEN"}
+response = requests.post(
+    "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.1",
+    headers=headers,
+    json={"inputs": "Hello!"}
+)
+```
+### **3. Groq (ultra szybkie)**
+```python
+from openai import OpenAI
+client = OpenAI(
+    api_key="YOUR_GROQ_KEY",
+    base_url="https://api.groq.com/openai/v1"
+)
+response = client.chat.completions.create(
+    model="mistral-7b-instruct",
+    messages=[{"role": "user", "content": "Hello!"}]
+)
+```
+### **4. Together.ai**
+```python
+from openai import OpenAI
+client = OpenAI(
+    api_key="YOUR_TOGETHER_KEY",
+    base_url="https://api.together.xyz/v1"
+)
+# Kompatybilne z OpenAI API
+```
+### **5. Modal.com (serverless GPU)**
+```python
+import modal
+stub = modal.Stub("llm-api")
+@stub.function(gpu="T4")
+def generate(prompt: str):
+    # Twój kod modelu
+    return model.generate(prompt)
+# Deploy jedną komendą
+# modal deploy
+```
+## 🎯 **CZĘŚĆ 4: Frontend opcje**
+### **1. Streamlit (Python)**
+```python
+import streamlit as st
+st.title("My LLM Chat")
+prompt = st.text_input("Message:")
+if st.button("Send"):
+    response = generate(prompt)
+    st.write(response)
+```
+### **2. Gradio (Python)**
+```python
+import gradio as gr
+def chat(message, history):
+    response = generate(message)
+    history.append([message, response])
+    return "", history
+gr.ChatInterface(chat).launch()
+```
+### **3. Next.js + Vercel AI SDK**
+```tsx
+import { useChat } from 'ai/react'
+export default function Chat() {
+  const { messages, input, handleInputChange, handleSubmit } = useChat()
+  return (
+    <div>
+      {messages.map(m => <div key={m.id}>{m.content}</div>)}
+      <form onSubmit={handleSubmit}>
+        <input value={input} onChange={handleInputChange} />
+      </form>
+    </div>
+  )
+}
+```
+## 🎯 **CZĘŚĆ 5: Porównanie rozwiązań**
+| Rozwiązanie | Setup Time | Kód | Hosting | GPU |
+|-------------|------------|-----|---------|-----|
+| **Ollama + Streamlit** | 2 min | 50 linijek | Local/Docker | Optional |
+| **Hugging Face API** | 30 sec | 5 linijek | Cloud | No |
+| **Groq API** | 1 min | 5 linijek | Cloud | No |
+| **Modal.com** | 5 min | 20 linijek | Serverless | Auto |
+| **Custom Fine-tuning** | 2 hours | 200 linijek | Self-hosted | Required |
+## 🛠️ **Debugging & Tips**
+### Typowe problemy
+```bash
+# Model nie ładuje się
+docker logs ollama-engine
+# Brak GPU
+docker run --rm --gpus all nvidia/cuda:11.8-base nvidia-smi
+# Port zajęty
+sudo netstat -tlnp | grep 11434
+# Restart wszystkiego
+docker compose down && docker compose up -d
+```
+### Optymalizacje RTX 3050
+```python
+# W fine-tuningu
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    load_in_4bit=True,        # 4-bit quantization
+    torch_dtype=torch.float16 # Half precision
+)
+# Training args
+TrainingArguments(
+    per_device_train_batch_size=1,   # Mały batch
+    gradient_accumulation_steps=4,   # Gradients accumulation
+    fp16=True                        # Mixed precision
+)
+```
+### Monitoring zasobów
+```bash
+# GPU monitoring
+watch -n 1 nvidia-smi
+# Container resources
+docker stats
+# Model memory usage
+docker exec -it ollama-engine ollama ps
+```
+## 🎯 **Następne kroki**
+### Dla nauki:
+1. **Eksperymentuj z różnymi modelami** - Llama, CodeLlama, Phi-3
+2. **Testuj różne techniki fine-tuningu** - LoRA, QLoRA, Full fine-tuning
+3. **Dodaj RAG** - Retrieval Augmented Generation
+4. **Stwórz multi-agent system**
+### Dla produkcji:
+1. **Przejdź na managed service** - Groq, Together.ai
+2. **Setup monitoring** - LangSmith, Weights & Biases
+3. **Dodaj cache** - Redis dla odpowiedzi
+4. **Implement rate limiting**
+### Dla biznesu:
+1. **Fine-tune na własnych danych**
+2. **Setup A/B testing** różnych modeli
+3. **Dodaj feedback loop** od użytkowników
+4. **Monetize API**
+## 🎉 **Podsumowanie**
+**Wybierz opcję według potrzeb:**
+- **Demo/nauka**: Ollama + Streamlit (to rozwiązanie)
+- **Prototyp**: Hugging Face API + Gradio
+- **MVP**: Groq API + Next.js
+- **Produkcja**: Modal/RunPod + custom frontend
+- **Enterprise**: Fine-tuned model + własna infrastruktura
+**Minimalne rozwiązanie = 5 plików, 50 linijek kodu, 2 minuty setup!**

app/main.py ADDED Viewed

	@@ -0,0 +1,124 @@

+#!/usr/bin/env python3
+"""
+Minimalna aplikacja LLM w 50 linijkach!
+Streamlit + Ollama = zero konfiguracji
+"""
+import streamlit as st
+import ollama
+import os
+from typing import Generator
+# Konfiguracja
+OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434")
+MODEL_NAME = "mistral:7b-instruct"
+# Setup Ollama client
+client = ollama.Client(host=OLLAMA_URL)
+def stream_response(prompt: str) -> Generator[str, None, None]:
+    """Generator dla streaming response"""
+    try:
+        stream = client.chat(
+            model=MODEL_NAME,
+            messages=[{"role": "user", "content": prompt}],
+            stream=True
+        )
+        for chunk in stream:
+            if chunk['message']['content']:
+                yield chunk['message']['content']
+    except Exception as e:
+        yield f"Error: {str(e)}"
+def main():
+    # UI Setup
+    st.set_page_config(
+        page_title="🤖 Minimal LLM Chat",
+        page_icon="🤖",
+        layout="wide"
+    )
+    st.title("🤖 Minimal LLM Chat")
+    st.markdown("*Powered by Ollama + Mistral 7B*")
+    # Sidebar z ustawieniami
+    with st.sidebar:
+        st.header("⚙️ Settings")
+        # Model info
+        try:
+            models = client.list()
+            available_models = [m['name'] for m in models['models']]
+            st.success(f"✅ Connected to Ollama")
+            st.info(f"Available models: {len(available_models)}")
+        except:
+            st.error("❌ Cannot connect to Ollama")
+            st.stop()
+        # Parameters
+        temperature = st.slider("Temperature", 0.0, 2.0, 0.7, 0.1)
+        max_tokens = st.slider("Max tokens", 50, 1000, 500, 50)
+        # System prompt
+        system_prompt = st.text_area(
+            "System prompt:",
+            "You are a helpful AI assistant. Answer concisely and accurately.",
+            height=100
+        )
+    # Chat interface
+    if "messages" not in st.session_state:
+        st.session_state.messages = []
+    # Display chat history
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+    # Chat input
+    if prompt := st.chat_input("Type your message here..."):
+        # Add user message
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        with st.chat_message("user"):
+            st.markdown(prompt)
+        # Generate response
+        with st.chat_message("assistant"):
+            response_placeholder = st.empty()
+            full_response = ""
+            # Streaming response
+            for chunk in stream_response(prompt):
+                full_response += chunk
+                response_placeholder.markdown(full_response + "▌")
+            response_placeholder.markdown(full_response)
+        # Add assistant message
+        st.session_state.messages.append({"role": "assistant", "content": full_response})
+    # Quick actions
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        if st.button("🗑️ Clear Chat"):
+            st.session_state.messages = []
+            st.rerun()
+    with col2:
+        if st.button("💡 Example Question"):
+            example = "Explain quantum computing in simple terms"
+            st.session_state.messages.append({"role": "user", "content": example})
+            st.rerun()
+    with col3:
+        if st.button("📊 Model Info"):
+            try:
+                info = client.show(MODEL_NAME)
+                st.json(info)
+            except:
+                st.error("Cannot get model info")
+if __name__ == "__main__":
+    main()

convert_to_gguf.sh ADDED Viewed

	@@ -0,0 +1,164 @@

+#!/bin/bash
+# 🔄 Convert fine-tuned model to GGUF format for Ollama
+# This script converts your custom fine-tuned model to GGUF format
+set -e
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+echo -e "${BLUE}🔄 Converting Model to GGUF Format${NC}"
+echo "====================================="
+# Configuration
+MODEL_DIR="./fine_tuned_model"
+OUTPUT_FILE="my_custom_model.gguf"
+LLAMA_CPP_DIR="./llama.cpp"
+# Check if fine-tuned model exists
+if [ ! -d "$MODEL_DIR" ]; then
+    echo -e "${RED}❌ Fine-tuned model not found at: $MODEL_DIR${NC}"
+    echo "Run fine-tuning first: python create_custom_model.py (option 2)"
+    exit 1
+fi
+echo -e "${GREEN}✅ Found fine-tuned model at: $MODEL_DIR${NC}"
+# Check if llama.cpp exists, if not clone it
+if [ ! -d "$LLAMA_CPP_DIR" ]; then
+    echo -e "${YELLOW}📥 Cloning llama.cpp...${NC}"
+    git clone https://github.com/ggerganov/llama.cpp.git
+    echo -e "${YELLOW}🔨 Building llama.cpp...${NC}"
+    cd llama.cpp
+    # Build with CUDA support if available
+    if command -v nvcc &> /dev/null; then
+        echo -e "${GREEN}🚀 NVIDIA CUDA detected, building with GPU support${NC}"
+        make LLAMA_CUBLAS=1 -j$(nproc)
+    else
+        echo -e "${YELLOW}⚠️  No CUDA detected, building CPU-only version${NC}"
+        make -j$(nproc)
+    fi
+    cd ..
+else
+    echo -e "${GREEN}✅ llama.cpp already exists${NC}"
+fi
+# Check required Python dependencies
+echo -e "${BLUE}📦 Checking Python dependencies...${NC}"
+python3 -c "import torch, transformers, sentencepiece" 2>/dev/null || {
+    echo -e "${YELLOW}⚠️  Installing missing dependencies...${NC}"
+    pip install torch transformers sentencepiece protobuf
+}
+# Convert model to GGUF
+echo -e "${BLUE}🔄 Converting to GGUF format...${NC}"
+echo "This may take several minutes..."
+# Method 1: Direct conversion (recommended)
+if [ -f "$LLAMA_CPP_DIR/convert.py" ]; then
+    echo -e "${GREEN}Using convert.py${NC}"
+    python3 "$LLAMA_CPP_DIR/convert.py" \
+        "$MODEL_DIR" \
+        --outtype f16 \
+        --outfile "$OUTPUT_FILE"
+else
+    # Method 2: Convert via HF format (fallback)
+    echo -e "${YELLOW}Using alternative conversion method${NC}"
+    python3 -c "
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import sys
+import os
+print('Loading model...')
+model = AutoModelForCausalLM.from_pretrained('$MODEL_DIR', torch_dtype=torch.float16)
+tokenizer = AutoTokenizer.from_pretrained('$MODEL_DIR')
+print('Saving in HF format...')
+model.save_pretrained('./temp_hf_model', safe_serialization=True)
+tokenizer.save_pretrained('./temp_hf_model')
+print('Conversion to HF format complete')
+"
+    # Then convert HF to GGUF
+    if [ -d "./temp_hf_model" ]; then
+        python3 "$LLAMA_CPP_DIR/convert.py" \
+            "./temp_hf_model" \
+            --outtype f16 \
+            --outfile "$OUTPUT_FILE"
+        rm -rf ./temp_hf_model
+    fi
+fi
+# Verify conversion
+if [ -f "$OUTPUT_FILE" ]; then
+    FILE_SIZE=$(du -h "$OUTPUT_FILE" | cut -f1)
+    echo
+    echo -e "${GREEN}🎉 Conversion successful!${NC}"
+    echo -e "${BLUE}📄 Output file: $OUTPUT_FILE${NC}"
+    echo -e "${BLUE}📊 File size: $FILE_SIZE${NC}"
+    # Optional: Quantize to smaller sizes
+    echo
+    echo -e "${YELLOW}💡 Optional: Create quantized versions?${NC}"
+    read -p "Create Q4_K_M quantized version? (y/N): " -n 1 -r
+    echo
+    if [[ $REPLY =~ ^[Yy]$ ]]; then
+        echo -e "${BLUE}🔄 Creating Q4_K_M quantized version...${NC}"
+        "$LLAMA_CPP_DIR/quantize" "$OUTPUT_FILE" "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" Q4_K_M
+        if [ -f "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" ]; then
+            QUANT_SIZE=$(du -h "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" | cut -f1)
+            echo -e "${GREEN}✅ Quantized version created: ${OUTPUT_FILE%.gguf}_q4_k_m.gguf ($QUANT_SIZE)${NC}"
+        fi
+    fi
+    # Test the converted model
+    echo
+    echo -e "${YELLOW}🧪 Test the converted model?${NC}"
+    read -p "Run a quick test? (y/N): " -n 1 -r
+    echo
+    if [[ $REPLY =~ ^[Yy]$ ]]; then
+        echo -e "${BLUE}🧪 Testing model...${NC}"
+        echo "Prompt: 'Hello, how are you?'"
+        echo "Response:"
+        "$LLAMA_CPP_DIR/main" -m "$OUTPUT_FILE" -p "Hello, how are you?" -n 50 --temp 0.7
+    fi
+else
+    echo -e "${RED}❌ Conversion failed!${NC}"
+    echo "Check the error messages above."
+    exit 1
+fi
+# Instructions for next steps
+echo
+echo -e "${GREEN}🎯 Next Steps:${NC}"
+echo "1. Create Ollama Modelfile:"
+echo "   python create_custom_model.py  # option 4"
+echo
+echo "2. Import to Ollama:"
+echo "   ollama create my-custom-model -f Modelfile"
+echo
+echo "3. Test in Ollama:"
+echo "   ollama run my-custom-model \"Hello!\""
+echo
+echo "4. Push to Ollama Library:"
+echo "   ollama push my-custom-model"
+echo
+echo -e "${BLUE}📚 Files created:${NC}"
+echo "   • $OUTPUT_FILE (F16 version)"
+if [ -f "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" ]; then
+    echo "   • ${OUTPUT_FILE%.gguf}_q4_k_m.gguf (Quantized version)"
+fi
+echo
+echo -e "${GREEN}🎉 GGUF conversion completed successfully!${NC}"

create_custom_model.py ADDED Viewed

	@@ -0,0 +1,373 @@

+#!/usr/bin/env python3
+"""
+🎯 Tworzenie własnego modelu LLM - od fine-tuningu do publikacji
+Przykład z Mistral 7B + własne dane
+"""
+import os
+import json
+from pathlib import Path
+from datasets import Dataset
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    TrainingArguments,
+    Trainer
+)
+from peft import LoraConfig, get_peft_model, TaskType
+import torch
+# === 1. PRZYGOTOWANIE DANYCH ===
+def create_sample_dataset():
+    """Tworzy przykładowy dataset do fine-tuningu"""
+    # Przykładowe dane - zamień na swoje!
+    sample_data = [
+        {
+            "instruction": "Jak nazywa się stolica Polski?",
+            "input": "",
+            "output": "Stolica Polski to Warszawa."
+        },
+        {
+            "instruction": "Wyjaśnij czym jest sztuczna inteligencja",
+            "input": "",
+            "output": "Sztuczna inteligencja (AI) to dziedzina informatyki zajmująca się tworzeniem systemów zdolnych do wykonywania zadań wymagających inteligencji."
+        },
+        {
+            "instruction": "Napisz krótką funkcję w Pythonie",
+            "input": "funkcja do obliczania silni",
+            "output": "```python\ndef factorial(n):\n    if n <= 1:\n        return 1\n    return n * factorial(n-1)\n```"
+        },
+        {
+            "instruction": "Jak zoptymalizować kod Python?",
+            "input": "",
+            "output": "Główne sposoby optymalizacji kodu Python:\n1. Używaj wbudowanych funkcji\n2. Unikaj pętli, preferuj list comprehensions\n3. Używaj numpy dla operacji numerycznych\n4. Profiluj kod przed optymalizacją"
+        },
+        {
+            "instruction": "Co to jest Docker?",
+            "input": "",
+            "output": "Docker to platforma konteneryzacji umożliwiająca pakowanie aplikacji wraz z zależnościami w lekkie, przenośne kontenery."
+        }
+    ]
+    # Zapisz dataset
+    os.makedirs("data", exist_ok=True)
+    with open("data/training_data.json", "w", encoding="utf-8") as f:
+        json.dump(sample_data, f, indent=2, ensure_ascii=False)
+    print("✅ Sample dataset created in data/training_data.json")
+    return sample_data
+def format_training_data(examples):
+    """Formatuje dane dla Mistral Instruct"""
+    formatted_texts = []
+    for example in examples:
+        if example.get("input"):
+            prompt = f"<s>[INST] {example['instruction']}\n{example['input']} [/INST] {example['output']}</s>"
+        else:
+            prompt = f"<s>[INST] {example['instruction']} [/INST] {example['output']}</s>"
+        formatted_texts.append(prompt)
+    return {"text": formatted_texts}
+# === 2. FINE-TUNING Z LORA ===
+def setup_model_and_tokenizer(model_name="mistralai/Mistral-7B-Instruct-v0.1"):
+    """Ładuje model i tokenizer"""
+    print(f"📥 Loading model: {model_name}")
+    # Tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    tokenizer.pad_token = tokenizer.eos_token
+    tokenizer.padding_side = "right"
+    # Model z quantization dla RTX 3050
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.float16,
+        device_map="auto",
+        load_in_4bit=True,  # 4-bit quantization
+        trust_remote_code=True
+    )
+    return model, tokenizer
+def setup_lora_config():
+    """Konfiguracja LoRA dla efficient fine-tuning"""
+    return LoraConfig(
+        task_type=TaskType.CAUSAL_LM,
+        inference_mode=False,
+        r=16,  # LoRA rank
+        lora_alpha=32,
+        lora_dropout=0.1,
+        target_modules=["q_proj", "k_proj", "v_proj", "o_proj"]  # Mistral attention modules
+    )
+def fine_tune_model():
+    """Główna funkcja fine-tuningu"""
+    # 1. Przygotuj dane
+    print("🔄 Preparing training data...")
+    sample_data = create_sample_dataset()
+    # 2. Ładuj model
+    model, tokenizer = setup_model_and_tokenizer()
+    # 3. Setup LoRA
+    lora_config = setup_lora_config()
+    model = get_peft_model(model, lora_config)
+    print(f"📊 Trainable parameters: {model.print_trainable_parameters()}")
+    # 4. Przygotuj dataset
+    dataset = Dataset.from_list(sample_data)
+    formatted_dataset = dataset.map(
+        lambda x: format_training_data([x]),
+        remove_columns=dataset.column_names
+    )
+    # Tokenizacja
+    def tokenize_function(examples):
+        return tokenizer(
+            examples["text"],
+            truncation=True,
+            padding="max_length",
+            max_length=512,
+            return_tensors="pt"
+        )
+    tokenized_dataset = formatted_dataset.map(tokenize_function, batched=True)
+    # 5. Training arguments - optymalizowane dla RTX 3050
+    training_args = TrainingArguments(
+        output_dir="./results",
+        num_train_epochs=3,
+        per_device_train_batch_size=1,  # Mały batch size dla RTX 3050
+        gradient_accumulation_steps=4,
+        warmup_steps=10,
+        learning_rate=2e-4,
+        fp16=True,  # Mixed precision
+        logging_steps=1,
+        save_strategy="epoch",
+        evaluation_strategy="no",
+        dataloader_num_workers=0,  # Avoid multiprocessing issues
+        remove_unused_columns=False,
+    )
+    # 6. Trainer
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=tokenized_dataset,
+        tokenizer=tokenizer,
+    )
+    # 7. Train!
+    print("🚀 Starting fine-tuning...")
+    trainer.train()
+    # 8. Save model
+    model.save_pretrained("./fine_tuned_model")
+    tokenizer.save_pretrained("./fine_tuned_model")
+    print("✅ Fine-tuning completed! Model saved to ./fine_tuned_model")
+    return model, tokenizer
+# === 3. KONWERSJA DO GGUF ===
+def convert_to_gguf():
+    """Konwertuje model do formatu GGUF dla Ollama"""
+    print("🔄 Converting to GGUF format...")
+    # Ten skrypt wymaga llama.cpp
+    conversion_script = """
+#!/bin/bash
+# Pobierz llama.cpp jeśli nie masz
+if [ ! -d "llama.cpp" ]; then
+    git clone https://github.com/ggerganov/llama.cpp.git
+    cd llama.cpp
+    make -j
+    cd ..
+fi
+# Konwertuj model
+python llama.cpp/convert.py ./fine_tuned_model --outtype f16 --outfile my_custom_model.gguf
+echo "✅ GGUF conversion completed: my_custom_model.gguf"
+"""
+    with open("convert_to_gguf.sh", "w") as f:
+        f.write(conversion_script)
+    os.chmod("convert_to_gguf.sh", 0o755)
+    print("📝 Created convert_to_gguf.sh script")
+    print("Run: ./convert_to_gguf.sh")
+# === 4. TWORZENIE MODELFILE DLA OLLAMA ===
+def create_ollama_modelfile():
+    """Tworzy Modelfile dla Ollama"""
+    modelfile_content = '''FROM ./my_custom_model.gguf
+# Model metadata
+PARAMETER temperature 0.7
+PARAMETER top_p 0.9
+PARAMETER top_k 40
+PARAMETER num_ctx 2048
+# System prompt
+SYSTEM "Jesteś pomocnym asystentem AI stworzonym specjalnie dla polskich użytkowników.\nOdpowiadasz w języku polskim, jesteś precyzyjny i pomocny.\nSpecjalizujesz się w programowaniu, technologii i sztucznej inteligencji."
+# Chat template dla Mistral
+TEMPLATE "<s>[INST] {{ if .System }}{{ .System }}{{ end }}{{ .Prompt }} [/INST] {{ .Response }}</s>"
+# Metadata
+PARAMETER num_predict 256
+PARAMETER stop "<s>"
+PARAMETER stop "[INST]"
+PARAMETER stop "[/INST]"
+'''
+    with open("Modelfile", "w", encoding="utf-8") as f:
+        f.write(modelfile_content)
+    print("✅ Utworzono Modelfile dla Ollama")
+    print("✅ Created Modelfile for Ollama")
+# === 5. PUBLIKACJA MODELU ===
+def create_model_in_ollama():
+    """Tworzy model w Ollama"""
+    ollama_commands = """
+# 1. Utwórz model w Ollama
+ollama create wronai -f Modelfile
+# 2. Test modelu
+ollama run wronai "Cześć! Kim jesteś?"
+# 3. Push do Ollama Library (wymaga konta)
+ollama push wronai
+# 4. Alternatywnie - export do pliku
+ollama save wronai wronai-model.tar
+"""
+    with open("ollama_commands.sh", "w") as f:
+        f.write(ollama_commands)
+    print("✅ Created ollama_commands.sh")
+# === 6. PUBLIKACJA NA HUGGING FACE ===
+def create_hf_publish_script():
+    """Skrypt do publikacji na Hugging Face"""
+    hf_script = '''#!/usr/bin/env python3
+"""
+Publikacja modelu na Hugging Face Hub
+"""
+from huggingface_hub import HfApi, create_repo
+import os
+def publish_to_hf():
+    # Konfiguracja
+    model_name = "your-username/my-custom-mistral-7b"
+    # Login (wymagany HF token)
+    # huggingface-cli login
+    # Utwórz repo
+    api = HfApi()
+    try:
+        create_repo(
+            repo_id=model_name,
+            repo_type="model",
+            private=False  # Ustaw True dla prywatnego
+        )
+        print(f"✅ Repository created: {model_name}")
+    except Exception as e:
+        print(f"Repository may already exist: {e}")
+    # Upload plików
+    api.upload_folder(
+        folder_path="./fine_tuned_model",
+        repo_id=model_name,
+        commit_message="Initial model upload"
+    )
+    # Upload GGUF (jeśli istnieje)
+    if os.path.exists("my_custom_model.gguf"):
+        api.upload_file(
+            path_or_fileobj="my_custom_model.gguf",
+            path_in_repo="my_custom_model.gguf",
+            repo_id=model_name,
+            commit_message="Add GGUF version"
+        )
+    print(f"🎉 Model published: https://huggingface.co/{model_name}")
+if __name__ == "__main__":
+    publish_to_hf()
+'''
+    with open("publish_to_hf.py", "w") as f:
+        f.write(hf_script)
+    print("✅ Created publish_to_hf.py")
+# === GŁÓWNA FUNKCJA ===
+def main():
+    """Pełny pipeline tworzenia własnego modelu"""
+    print("🎯 Custom LLM Creation Pipeline")
+    print("===============================")
+    choice = input("""
+Wybierz opcję:
+1. Stwórz sample dataset
+2. Fine-tune model (wymaga GPU)
+3. Konwertuj do GGUF
+4. Utwórz Modelfile dla Ollama
+5. Przygotuj skrypty publikacji
+6. Pełny pipeline (1-5)
+Wybór (1-6): """).strip()
+    if choice == "1":
+        create_sample_dataset()
+    elif choice == "2":
+        fine_tune_model()
+    elif choice == "3":
+        convert_to_gguf()
+    elif choice == "4":
+        create_ollama_modelfile()
+    elif choice == "5":
+        create_hf_publish_script()
+    elif choice == "6":
+        print("🚀 Running full pipeline...")
+        create_sample_dataset()
+        if input("Continue with fine-tuning? (y/N): ").lower() == 'y':
+            fine_tune_model()
+            convert_to_gguf()
+        create_ollama_modelfile()
+        create_model_in_ollama()
+        create_hf_publish_script()
+        print("✅ Full pipeline completed!")
+    else:
+        print("Invalid choice")
+if __name__ == "__main__":
+    main()

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,76 @@

+version: '3.8'
+# Load environment variables from .env file
+x-env: &env
+  env_file: .env
+  environment:
+    - OLLAMA_PORT=${OLLAMA_PORT:-11434}
+    - OLLAMA_HOST=${OLLAMA_HOST:-0.0.0.0}
+    - STREAMLIT_SERVER_PORT=${STREAMLIT_SERVER_PORT:-8501}
+    - STREAMLIT_SERVER_ADDRESS=${STREAMLIT_SERVER_ADDRESS:-0.0.0.0}
+    - DEFAULT_MODEL=${DEFAULT_MODEL:-mistral:7b-instruct}
+services:
+  # Ollama - LLM Engine (minimalna konfiguracja)
+  ollama:
+    image: ollama/ollama:latest
+    container_name: ollama-engine
+    <<: *env
+    ports:
+      - "${OLLAMA_PORT:-11434}:11434"
+    volumes:
+      - ollama_data:/root/.ollama
+    # deploy:
+      # resources:
+        # reservations:
+          # devices:
+            # - driver: nvidia
+              # count: 1
+              # capabilities: [gpu]
+    restart: unless-stopped
+  # Streamlit UI (Python frontend)
+  streamlit-ui:
+    build: .
+    container_name: llm-ui
+    <<: *env
+    ports:
+      - "${STREAMLIT_SERVER_PORT:-8501}:8501"
+    depends_on:
+      - ollama
+    environment:
+      - OLLAMA_URL=http://ollama:${OLLAMA_PORT:-11434}
+      - STREAMLIT_SERVER_PORT=${STREAMLIT_SERVER_PORT:-8501}
+      - STREAMLIT_SERVER_ADDRESS=${STREAMLIT_SERVER_ADDRESS:-0.0.0.0}
+    restart: unless-stopped
+    volumes:
+      - ./app:/app
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:${STREAMLIT_SERVER_PORT:-8501}/_stcore/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+  # Model Setup (inicjalizacja modelu)
+  model-setup:
+    image: ollama/ollama:latest
+    container_name: model-setup
+    <<: *env
+    depends_on:
+      - ollama
+    volumes:
+      - ollama_data:/root/.ollama
+    environment:
+      - OLLAMA_HOST=http://ollama:${OLLAMA_PORT:-11434}
+      - DEFAULT_MODEL=${DEFAULT_MODEL:-mistral:7b-instruct}
+    command: >
+      sh -c "
+        echo 'Waiting for Ollama to be ready...' &&
+        sleep 10 &&
+        ollama pull ${DEFAULT_MODEL:-mistral:7b-instruct} &&
+        echo 'Model loaded successfully!'
+      "
+    restart: "no"
+volumes:
+  ollama_data:

example_training_data.json ADDED Viewed

	@@ -0,0 +1,32 @@

+[
+  {
+    "instruction": "Jak zoptymalizować kod Python dla machine learning?",
+    "input": "",
+    "output": "Kluczowe optymalizacje dla ML w Pythonie:\n\n1. **Vectoryzacja z NumPy/Pandas**:\n   - Unikaj pętli Python, używaj operacji wektorowych\n   - `df.apply()` zamiast iteracji przez wiersze\n\n2. **Wykorzystanie GPU**:\n   - PyTorch/TensorFlow z CUDA\n   - CuPy dla operacji NumPy na GPU\n\n3. **Multiprocessing**:\n   - `multiprocessing.Pool` dla CPU-intensive tasks\n   - `joblib.Parallel` dla sklearn\n\n4. **Memory management**:\n   - `del` niepotrzebnych zmiennych\n   - `gc.collect()` dla garbage collection\n   - Chunking dla dużych datasets\n\n5. **Profiling**:\n   - `cProfile` do znajdowania bottlenecków\n   - `memory_profiler` dla użycia pamięci"
+  },
+  {
+    "instruction": "Wyjaśnij Docker dla początkujących",
+    "input": "chcę zrozumieć podstawy",
+    "output": "Docker to narzędzie do konteneryzacji aplikacji. Podstawowe koncepty:\n\n**Co to jest kontener?**\n- Lekka, przenośna \"paczka\" z aplikacją i wszystkimi jej zależnościami\n- Działa identycznie na każdym systemie\n\n**Podstawowe komendy:**\n```bash\n# Uruchom kontener\ndocker run nginx\n\n# Lista działających kontenerów\ndocker ps\n\n# Zbuduj obraz z Dockerfile\ndocker build -t myapp .\n\n# Zatrzymaj kontener\ndocker stop <container_id>\n```\n\n**Dockerfile przykład:**\n```dockerfile\nFROM python:3.11\nCOPY . /app\nWORKDIR /app\nRUN pip install -r requirements.txt\nCMD [\"python\", \"app.py\"]\n```\n\n**Zalety:**\n- Spójność środowiska (dev = prod)\n- Łatwe deployowanie\n- Izolacja aplikacji\n- Skalowalność"
+  },
+  {
+    "instruction": "Jak stworzyć REST API w FastAPI?",
+    "input": "podstawowy przykład",
+    "output": "Oto kompletny przykład REST API w FastAPI:\n\n```python\nfrom fastapi import FastAPI, HTTPException\nfrom pydantic import BaseModel\nfrom typing import List, Optional\n\napp = FastAPI(title=\"My API\", version=\"1.0.0\")\n\n# Model danych\nclass Item(BaseModel):\n    id: Optional[int] = None\n    name: str\n    price: float\n    description: Optional[str] = None\n\n# \"Baza danych\"\nitems = []\n\n# Endpoints\n@app.get(\"/\")\ndef read_root():\n    return {\"message\": \"Hello World\"}\n\n@app.get(\"/items\", response_model=List[Item])\ndef get_items():\n    return items\n\n@app.get(\"/items/{item_id}\", response_model=Item)\ndef get_item(item_id: int):\n    item = next((x for x in items if x.id == item_id), None)\n    if not item:\n        raise HTTPException(status_code=404, detail=\"Item not found\")\n    return item\n\n@app.post(\"/items\", response_model=Item)\ndef create_item(item: Item):\n    item.id = len(items) + 1\n    items.append(item)\n    return item\n\n@app.put(\"/items/{item_id}\", response_model=Item)\ndef update_item(item_id: int, updated_item: Item):\n    for i, item in enumerate(items):\n        if item.id == item_id:\n            updated_item.id = item_id\n            items[i] = updated_item\n            return updated_item\n    raise HTTPException(status_code=404, detail=\"Item not found\")\n\n@app.delete(\"/items/{item_id}\")\ndef delete_item(item_id: int):\n    global items\n    items = [x for x in items if x.id != item_id]\n    return {\"message\": \"Item deleted\"}\n\n# Uruchomienie\nif __name__ == \"__main__\":\n    import uvicorn\n    uvicorn.run(app, host=\"0.0.0.0\", port=8000)\n```\n\n**Uruchomienie:**\n```bash\npip install fastapi uvicorn\npython main.py\n```\n\n**Automatyczna dokumentacja:**\n- Swagger UI: http://localhost:8000/docs\n- ReDoc: http://localhost:8000/redoc"
+  },
+  {
+    "instruction": "Wyjaśnij różnice między AI, ML i Deep Learning",
+    "input": "",
+    "output": "**Hierarchia pojęć (od najszerszego):**\n\n🧠 **Artificial Intelligence (AI)**\n- Najszersze pojęcie\n- Systemy zdolne do wykonywania zadań wymagających \"inteligencji\"\n- Przykłady: chatboty, systemy eksperckie, algorytmy gier\n\n🤖 **Machine Learning (ML) ⊂ AI**\n- Podzbiór AI\n- Algorytmy uczące się z danych bez jawnego programowania\n- Główne typy:\n  - **Supervised**: uczenie z etykietowanymi danymi\n  - **Unsupervised**: znajdowanie wzorców w danych\n  - **Reinforcement**: uczenie przez nagrody/kary\n\n🧬 **Deep Learning (DL) ⊂ ML**\n- Podzbiór ML\n- Sieci neuronowe z wieloma warstwami (\"głębokie\")\n- Automatyczne wyodrębnianie cech z danych\n- Przykłady: CNN (obrazy), RNN (sekwencje), Transformers (tekst)\n\n**Praktyczne różnice:**\n\n| Aspekt | AI | ML | Deep Learning |\n|--------|----|----|---------------|\n| **Dane** | Reguły+dane | Duże zbiory | Bardzo duże zbiory |\n| **Wydajność** | Zależna od reguł | Dobra | Najlepsza (z danymi) |\n| **Interpretacja** | Jasna | Średnia | Trudna (black box) |\n| **Moc obliczeniowa** | Niska | Średnia | Wysoka |\n\n**Przykłady:**\n- **AI bez ML**: System ekspercki w medycynie (reguły if-then)\n- **ML bez DL**: Drzewo decyzyjne, SVM, regresja\n- **Deep Learning**: GPT, BERT, rozpoznawanie obrazów"
+  },
+  {
+    "instruction": "Jak debugging w Pythonie?",
+    "input": "najlepsze praktyki",
+    "output": "**Kompletny guide do debuggingu w Pythonie:**\n\n## 1. **Print debugging (podstawa)**\n```python\n# Klasyczne print\nprint(f\"Zmienna x = {x}, typ: {type(x)}\")\n\n# Lepsze print z kontekstem\nprint(f\"DEBUG [funkcja_name:42]: x={x}\")\n\n# Print z formatowaniem JSON\nimport json\nprint(json.dumps(data, indent=2, ensure_ascii=False))\n```\n\n## 2. **Python Debugger (pdb)**\n```python\n# Wstaw breakpoint\nimport pdb; pdb.set_trace()\n\n# Python 3.7+\nbreakpoint()  # Equivalent do pdb.set_trace()\n\n# Komendy w pdb:\n# n (next line)\n# s (step into)\n# c (continue)\n# l (list code)\n# p variable_name (print variable)\n# pp variable_name (pretty print)\n# q (quit)\n```\n\n## 3. **Logging (profesjonalne)**\n```python\nimport logging\n\n# Konfiguracja\nlogging.basicConfig(\n    level=logging.DEBUG,\n    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'\n)\n\nlogger = logging.getLogger(__name__)\n\n# Użycie\nlogger.debug(\"Szczegółowe info dla debuggingu\")\nlogger.info(\"Ogólne informacje\")\nlogger.warning(\"Ostrzeżenie\")\nlogger.error(\"Błąd\")\nlogger.critical(\"Krytyczny błąd\")\n```\n\n## 4. **IDE Debuggers**\n- **VS Code**: F5 → Run and Debug\n- **PyCharm**: Klik na marginesie → Add Breakpoint\n- **Jupyter**: `%debug` magic command\n\n## 5. **Specialized Tools**\n```python\n# Memory profiling\nfrom memory_profiler import profile\n\n@profile\ndef my_function():\n    # kod do profilowania\n    pass\n\n# Performance profiling\nimport cProfile\ncProfile.run('my_function()')\n\n# Exception debugging\nimport traceback\ntry:\n    risky_code()\nexcept Exception as e:\n    traceback.print_exc()\n    # lub logger.exception(\"Error occurred\")\n```\n\n## 6. **Best Practices**\n- **Używaj assertions**: `assert x > 0, \"x must be positive\"`\n- **Type hints**: Pomaga IDE w debuggingu\n- **Unit testy**: Wczesne wykrywanie błędów\n- **Linting**: `flake8`, `pylint`, `black`\n- **Exception handling**: Zawsze obsługuj wyjątki\n\n## 7. **Quick Debug Tricks**\n```python\n# Inspect object\nvars(object)  # Wszystkie atrybuty\ndir(object)   # Metody i atrybuty\n\n# Check if variable exists\nif 'variable' in locals():\n    print(variable)\n\n# Pretty print data structures\nfrom pprint import pprint\npprint(complex_dict)\n```"
+  },
+  {
+    "instruction": "Jak zacząć z RTX 3050 i machine learning?",
+    "input": "pierwszy projekt",
+    "output": "**Kompletny przewodnik ML na RTX 3050:**\n\n## 🎯 **Setup środowiska**\n\n```bash\n# 1. CUDA i cuDNN\n# Pobierz CUDA 11.8 z nvidia.com\n# Sprawdź: nvidia-smi\n\n# 2. Python environment\nconda create -n ml python=3.10\nconda activate ml\n\n# 3. PyTorch z CUDA\npip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118\n\n# 4. ML libraries\npip install pandas numpy matplotlib scikit-learn jupyter\n\n# Test GPU\npython -c \"import torch; print(f'CUDA: {torch.cuda.is_available()}')\"\n```\n\n## 🚀 **Pierwszy projekt: Klasyfikacja obrazów**\n\n```python\nimport torch\nimport torch.nn as nn\nimport torchvision\nfrom torch.utils.data import DataLoader\nimport matplotlib.pyplot as plt\n\n# 1. Sprawdź GPU\ndevice = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\nprint(f'Using device: {device}')\n\n# 2. Dataset (CIFAR-10)\ntransform = torchvision.transforms.Compose([\n    torchvision.transforms.ToTensor(),\n    torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n])\n\ntrain_dataset = torchvision.datasets.CIFAR10(\n    root='./data', train=True, download=True, transform=transform\n)\ntest_dataset = torchvision.datasets.CIFAR10(\n    root='./data', train=False, download=True, transform=transform\n)\n\n# 3. DataLoaders (optymalizowane dla RTX 3050)\ntrain_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)\ntest_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)\n\n# 4. Prosty CNN model\nclass SimpleCNN(nn.Module):\n    def __init__(self):\n        super(SimpleCNN, self).__init__()\n        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)\n        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)\n        self.pool = nn.MaxPool2d(2, 2)\n        self.fc1 = nn.Linear(64 * 8 * 8, 512)\n        self.fc2 = nn.Linear(512, 10)\n        self.dropout = nn.Dropout(0.5)\n\n    def forward(self, x):\n        x = self.pool(torch.relu(self.conv1(x)))\n        x = self.pool(torch.relu(self.conv2(x)))\n        x = x.view(-1, 64 * 8 * 8)\n        x = torch.relu(self.fc1(x))\n        x = self.dropout(x)\n        x = self.fc2(x)\n        return x\n\n# 5. Training setup\nmodel = SimpleCNN().to(device)\ncriterion = nn.CrossEntropyLoss()\noptimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n\n# 6. Training loop\nnum_epochs = 5\nfor epoch in range(num_epochs):\n    model.train()\n    running_loss = 0.0\n    \n    for i, (images, labels) in enumerate(train_loader):\n        images, labels = images.to(device), labels.to(device)\n        \n        optimizer.zero_grad()\n        outputs = model(images)\n        loss = criterion(outputs, labels)\n        loss.backward()\n        optimizer.step()\n        \n        running_loss += loss.item()\n        \n        if (i+1) % 100 == 0:\n            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}], Loss: {loss.item():.4f}')\n    \n    print(f'Epoch [{epoch+1}] Average Loss: {running_loss/len(train_loader):.4f}')\n\n# 7. Evaluation\nmodel.eval()\ncorrect = 0\ntotal = 0\n\nwith torch.no_grad():\n    for images, labels in test_loader:\n        images, labels = images.to(device), labels.to(device)\n        outputs = model(images)\n        _, predicted = torch.max(outputs.data, 1)\n        total += labels.size(0)\n        correct += (predicted == labels).sum().item()\n\nprint(f'Test Accuracy: {100 * correct / total:.2f}%')\n```\n\n## ⚡ **Optymalizacje dla RTX 3050**\n\n```python\n# 1. Mixed Precision Training (oszczędza VRAM)\nfrom torch.cuda.amp import autocast, GradScaler\n\nscaler = GradScaler()\n\n# W training loop:\nwith autocast():\n    outputs = model(images)\n    loss = criterion(outputs, labels)\n\nscaler.scale(loss).backward()\nscaler.step(optimizer)\nscaler.update()\n\n# 2. Gradient Checkpointing\nmodel.gradient_checkpointing_enable()\n\n# 3. DataLoader optymalizacje\ntrain_loader = DataLoader(\n    dataset, \n    batch_size=32,  # Zmniejsz jeśli brakuje VRAM\n    num_workers=2,  # 2-4 dla RTX 3050\n    pin_memory=True,  # Szybszy transfer CPU->GPU\n    persistent_workers=True  # PyTorch 1.7+\n)\n```\n\n## 📊 **Monitoring GPU**\n\n```python\n# Memory monitoring\ndef print_gpu_memory():\n    if torch.cuda.is_available():\n        print(f'GPU Memory: {torch.cuda.memory_allocated()/1024**3:.1f}GB / {torch.cuda.max_memory_allocated()/1024**3:.1f}GB')\n        torch.cuda.reset_peak_memory_stats()\n\n# Użyj po każdej epoce\nprint_gpu_memory()\n```\n\n## 🎯 **Następne projekty:**\n\n1. **NLP**: Fine-tuning BERT na polskich tekstach\n2. **Computer Vision**: Object detection z YOLO\n3. **Time Series**: Predykcja cen akcji z LSTM\n4. **Generative**: Treninig mini-GAN\n5. **Reinforcement Learning**: Gra w Atari\n\n## 💡 **Tips dla RTX 3050:**\n\n- **Batch size**: Zacznij od 32-64, dostosuj do VRAM\n- **Model size**: Używaj mniejszych modeli (ResNet18 zamiast ResNet152)\n- **Mixed precision**: Zawsze włączaj dla większych modeli\n- **Transfer learning**: Używaj pre-trained modeli zamiast trenować od zera\n- **Gradient accumulation**: Jeśli musisz mały batch size\n\n**RTX 3050 = świetna karta do nauki ML! 8GB VRAM wystarczy na większość projektów edukacyjnych.**"
+  }
+]

model_requirements.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+# Fine-tuning requirements
+torch>=2.2.2
+transformers>=4.39.0
+datasets>=2.18.0
+peft>=0.10.0
+accelerate>=0.29.0
+bitsandbytes>=0.43.0
+huggingface_hub>=0.22.0
+# For data processing
+pandas>=2.2.0
+numpy>=1.26.0
+# For model publishing
+huggingface_hub[cli]>=0.22.0
+# Optional: for advanced training
+wandb>=0.17.0
+tensorboard>=2.16.0

quick-start.sh ADDED Viewed

	@@ -0,0 +1,131 @@

+#!/bin/bash
+# 🚀 Minimal LLM Setup - Everything in one script!
+set -e
+# Colors
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+echo -e "${BLUE}🚀 Minimal LLM Setup${NC}"
+echo "===================="
+# Check Docker
+if ! command -v docker &> /dev/null; then
+    echo -e "${RED}❌ Docker not found. Please install Docker first.${NC}"
+    exit 1
+fi
+# Check Docker Compose
+if ! docker compose version &> /dev/null; then
+    echo -e "${RED}❌ Docker Compose not found. Please install Docker Compose.${NC}"
+    exit 1
+fi
+# Check NVIDIA Docker (optional)
+if docker run --rm --gpus all nvidia/cuda:11.8-base-ubuntu22.04 nvidia-smi &>/dev/null; then
+    echo -e "${GREEN}✅ NVIDIA Docker detected${NC}"
+    GPU_AVAILABLE=true
+else
+    echo -e "${YELLOW}⚠️  No GPU detected, running on CPU${NC}"
+    GPU_AVAILABLE=false
+fi
+# Create project structure
+echo -e "${BLUE}📁 Creating project structure...${NC}"
+mkdir -p app
+# Create minimal Streamlit app if it doesn't exist
+if [ ! -f "app/main.py" ]; then
+    echo -e "${BLUE}📝 Creating Streamlit app...${NC}"
+    # The file content would be copied here in real scenario
+    echo "# Streamlit app created. Copy the main.py content here."
+fi
+# Modify docker-compose for CPU if no GPU
+if [ "$GPU_AVAILABLE" = false ]; then
+    echo -e "${YELLOW}🔧 Configuring for CPU mode...${NC}"
+    sed -i 's/deploy:/# deploy:/g' docker-compose.yml || true
+    sed -i 's/resources:/# resources:/g' docker-compose.yml || true
+    sed -i 's/reservations:/# reservations:/g' docker-compose.yml || true
+    sed -i 's/devices:/# devices:/g' docker-compose.yml || true
+    sed -i 's/- driver: nvidia/# - driver: nvidia/g' docker-compose.yml || true
+    sed -i 's/count: 1/# count: 1/g' docker-compose.yml || true
+    sed -i 's/capabilities: \[gpu\]/# capabilities: [gpu]/g' docker-compose.yml || true
+fi
+# Build and start services
+echo -e "${BLUE}🔨 Building and starting services...${NC}"
+docker compose up --build -d
+# Wait for services
+echo -e "${BLUE}⏳ Waiting for services to start...${NC}"
+# Wait for Ollama
+echo -n "Waiting for Ollama"
+for i in {1..30}; do
+    if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
+        echo -e "${GREEN} ✅${NC}"
+        break
+    fi
+    echo -n "."
+    sleep 2
+done
+# Wait for Streamlit
+echo -n "Waiting for Streamlit"
+for i in {1..30}; do
+    if curl -s http://localhost:8501/_stcore/health > /dev/null 2>&1; then
+        echo -e "${GREEN} ✅${NC}"
+        break
+    fi
+    echo -n "."
+    sleep 2
+done
+# Check if model download completed
+echo -e "${BLUE}📥 Checking model download...${NC}"
+docker logs model-setup | tail -5
+echo
+echo -e "${GREEN}🎉 Setup completed!${NC}"
+echo "==================="
+echo
+echo -e "${BLUE}📍 Access points:${NC}"
+echo "   • Streamlit UI: http://localhost:8501"
+echo "   • Ollama API:   http://localhost:11434"
+echo
+echo -e "${BLUE}🔍 Useful commands:${NC}"
+echo "   • Check logs:     docker compose logs -f"
+echo "   • Stop services:  docker compose down"
+echo "   • Restart:        docker compose restart"
+echo "   • Shell access:   docker exec -it ollama-engine bash"
+echo
+echo -e "${BLUE}🧪 Test API:${NC}"
+echo '   curl -X POST http://localhost:11434/api/generate \'
+echo '     -H "Content-Type: application/json" \'
+echo '     -d '"'"'{"model": "mistral:7b-instruct", "prompt": "Hello!"}'\'
+# Auto-open browser (optional)
+if command -v xdg-open &> /dev/null; then
+    echo
+    read -p "Open browser automatically? (y/N): " -n 1 -r
+    echo
+    if [[ $REPLY =~ ^[Yy]$ ]]; then
+        xdg-open http://localhost:8501
+    fi
+elif command -v open &> /dev/null; then
+    echo
+    read -p "Open browser automatically? (y/N): " -n 1 -r
+    echo
+    if [[ $REPLY =~ ^[Yy]$ ]]; then
+        open http://localhost:8501
+    fi
+fi
+echo
+echo -e "${GREEN}Happy chatting! 🤖${NC}"

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+streamlit==1.29.0
+requests==2.31.0
+ollama==0.1.7

test_converted_model.py ADDED Viewed

	@@ -0,0 +1,273 @@

+#!/usr/bin/env python3
+"""
+🧪 Test script for converted GGUF model
+Tests both llama.cpp and Ollama integration
+"""
+import os
+import subprocess
+import time
+import requests
+import json
+from pathlib import Path
+def test_llamacpp_direct():
+    """Test model directly with llama.cpp"""
+    print("🧪 Testing with llama.cpp directly...")
+    model_file = "my_custom_model.gguf"
+    if not os.path.exists(model_file):
+        print(f"❌ Model file not found: {model_file}")
+        return False
+    llamacpp_main = "./llama.cpp/main"
+    if not os.path.exists(llamacpp_main):
+        print(f"❌ llama.cpp main not found: {llamacpp_main}")
+        print("Run: ./convert_to_gguf.sh first")
+        return False
+    test_prompts = [
+        "Hello, how are you?",
+        "Wyjaśnij co to jest Docker",
+        "Napisz prostą funkcję w Pythonie"
+    ]
+    for i, prompt in enumerate(test_prompts, 1):
+        print(f"\n--- Test {i}/3: {prompt[:30]}... ---")
+        cmd = [
+            llamacpp_main,
+            "-m", model_file,
+            "-p", prompt,
+            "-n", "100",
+            "--temp", "0.7",
+            "--top-p", "0.9"
+        ]
+        try:
+            result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
+            if result.returncode == 0:
+                print("✅ Response generated successfully")
+                print("Response preview:", result.stdout[:200] + "..." if len(result.stdout) > 200 else result.stdout)
+            else:
+                print(f"❌ Error: {result.stderr}")
+                return False
+        except subprocess.TimeoutExpired:
+            print("⏰ Timeout - model may be too slow")
+            return False
+        except Exception as e:
+            print(f"❌ Exception: {e}")
+            return False
+    return True
+def test_ollama_integration():
+    """Test model through Ollama"""
+    print("\n🤖 Testing Ollama integration...")
+    # Check if Ollama is installed
+    try:
+        result = subprocess.run(["ollama", "list"], capture_output=True, text=True)
+        if result.returncode != 0:
+            print("❌ Ollama not installed or not running")
+            return False
+    except FileNotFoundError:
+        print("❌ Ollama command not found")
+        return False
+    model_name = "my-custom-model"
+    # Check if our custom model exists in Ollama
+    if model_name not in result.stdout:
+        print(f"⚠️  Model '{model_name}' not found in Ollama")
+        print("Create it first:")
+        print("1. ollama create my-custom-model -f Modelfile")
+        return False
+    print(f"✅ Found model: {model_name}")
+    # Test through Ollama API
+    test_prompts = [
+        "Cześć! Kim jesteś?",
+        "Jak zoptymalizować kod Python?",
+        "Co to jest machine learning?"
+    ]
+    for i, prompt in enumerate(test_prompts, 1):
+        print(f"\n--- Ollama Test {i}/3: {prompt[:30]}... ---")
+        try:
+            # Test via CLI
+            cmd = ["ollama", "run", model_name, prompt]
+            result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
+            if result.returncode == 0:
+                print("✅ Ollama CLI response successful")
+                print("Response preview:", result.stdout[:200] + "..." if len(result.stdout) > 200 else result.stdout)
+            else:
+                print(f"❌ Ollama CLI error: {result.stderr}")
+                continue
+        except subprocess.TimeoutExpired:
+            print("⏰ Ollama timeout")
+            continue
+        except Exception as e:
+            print(f"❌ Ollama exception: {e}")
+            continue
+    # Test via API
+    print("\n🌐 Testing Ollama API...")
+    try:
+        api_url = "http://localhost:11434/api/generate"
+        test_data = {
+            "model": model_name,
+            "prompt": "Hello! Test API call.",
+            "stream": False
+        }
+        response = requests.post(api_url, json=test_data, timeout=60)
+        if response.status_code == 200:
+            data = response.json()
+            print("✅ Ollama API response successful")
+            print("API Response:", data.get('response', 'No response field')[:100])
+        else:
+            print(f"❌ API Error: {response.status_code}")
+            return False
+    except requests.exceptions.RequestException as e:
+        print(f"❌ API Request failed: {e}")
+        return False
+    return True
+def benchmark_model():
+    """Simple benchmark of the model"""
+    print("\n📊 Running simple benchmark...")
+    model_file = "my_custom_model.gguf"
+    if not os.path.exists(model_file):
+        print("❌ Model file not found for benchmark")
+        return
+    # Get file size
+    file_size = os.path.getsize(model_file) / (1024 ** 3)  # GB
+    print(f"📁 Model size: {file_size:.2f} GB")
+    # Benchmark prompt
+    benchmark_prompt = "Explain artificial intelligence in simple terms."
+    llamacpp_main = "./llama.cpp/main"
+    if os.path.exists(llamacpp_main):
+        print("⏱️  Timing generation speed...")
+        cmd = [
+            llamacpp_main,
+            "-m", model_file,
+            "-p", benchmark_prompt,
+            "-n", "100",
+            "--temp", "0.7"
+        ]
+        start_time = time.time()
+        try:
+            result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
+            end_time = time.time()
+            if result.returncode == 0:
+                duration = end_time - start_time
+                # Rough tokens estimation
+                tokens = len(result.stdout.split())
+                tokens_per_second = tokens / duration if duration > 0 else 0
+                print(f"⚡ Generation time: {duration:.2f} seconds")
+                print(f"🚀 Speed: ~{tokens_per_second:.1f} tokens/second")
+                print(f"📝 Generated tokens: ~{tokens}")
+            else:
+                print("❌ Benchmark failed")
+        except subprocess.TimeoutExpired:
+            print("⏰ Benchmark timeout")
+def main():
+    """Main test runner"""
+    print("🧪 Custom Model Test Suite")
+    print("=" * 40)
+    # Check prerequisites
+    print("🔍 Checking prerequisites...")
+    required_files = [
+        "my_custom_model.gguf",
+        "./llama.cpp/main",
+        "Modelfile"
+    ]
+    missing_files = [f for f in required_files if not os.path.exists(f)]
+    if missing_files:
+        print("❌ Missing required files:")
+        for f in missing_files:
+            print(f"   • {f}")
+        print("\nRun these commands first:")
+        print("1. python create_custom_model.py  # fine-tune model")
+        print("2. ./convert_to_gguf.sh           # convert to GGUF")
+        print("3. ollama create my-custom-model -f Modelfile  # import to Ollama")
+        return
+    print("✅ All required files found")
+    # Run tests
+    tests_passed = 0
+    total_tests = 3
+    # Test 1: Direct llama.cpp
+    if test_llamacpp_direct():
+        tests_passed += 1
+        print("✅ llama.cpp test PASSED")
+    else:
+        print("❌ llama.cpp test FAILED")
+    # Test 2: Ollama integration
+    if test_ollama_integration():
+        tests_passed += 1
+        print("✅ Ollama test PASSED")
+    else:
+        print("❌ Ollama test FAILED")
+    # Test 3: Benchmark
+    benchmark_model()
+    tests_passed += 1  # Benchmark always "passes"
+    # Results
+    print("\n" + "=" * 40)
+    print(f"🎯 Test Results: {tests_passed}/{total_tests} passed")
+    if tests_passed == total_tests:
+        print("🎉 All tests passed! Your custom model is ready!")
+        print("\n🚀 Next steps:")
+        print("• ollama push my-custom-model  # Share with the world")
+        print("• Integrate into your applications")
+        print("• Fine-tune further with more data")
+    else:
+        print("⚠️  Some tests failed. Check the output above.")
+    # Usage examples
+    print("\n📚 Usage Examples:")
+    print("# Ollama CLI:")
+    print("ollama run my-custom-model 'Your question here'")
+    print("\n# Ollama API:")
+    print("curl -X POST http://localhost:11434/api/generate \\")
+    print("  -H 'Content-Type: application/json' \\")
+    print("  -d '{\"model\": \"my-custom-model\", \"prompt\": \"Hello!\"}'")
+    print("\n# Python integration:")
+    print("import ollama")
+    print("response = ollama.chat(model='my-custom-model', messages=[")
+    print("  {'role': 'user', 'content': 'Hello!'}])")
+if __name__ == "__main__":
+    main()