softreck commited on
Commit
0fa4cc9
·
verified ·
1 Parent(s): cc87b41

Upload folder using huggingface_hub

Browse files
.env.example ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ollama Configuration
2
+ OLLAMA_HOST=0.0.0.0
3
+ OLLAMA_PORT=11436
4
+
5
+ # Streamlit Configuration
6
+ STREAMLIT_SERVER_PORT=8501
7
+ STREAMLIT_SERVER_ADDRESS=0.0.0.0
8
+ STREAMLIT_SERVER_HEADLESS=true
9
+ STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
10
+
11
+ # Application Settings
12
+ APP_DEBUG=false
13
+ APP_ENV=development
14
+ APP_SECRET_KEY=your-secret-key-here
15
+
16
+ # Model Configuration
17
+ DEFAULT_MODEL=mistral:7b-instruct
18
+ MODEL_TEMPERATURE=0.7
19
+ MAX_TOKENS=2000
20
+
21
+ # API Configuration (if needed)
22
+ # API_KEY=your-api-key-here
23
+ # API_BASE_URL=http://localhost:11434
24
+
25
+ # Database Configuration (if needed)
26
+ # DB_HOST=db
27
+ # DB_PORT=5432
28
+ # DB_NAME=llm_demo
29
+ # DB_USER=postgres
30
+ # DB_PASSWORD=your-db-password
31
+
32
+ # CORS Configuration (if needed)
33
+ # CORS_ORIGINS=http://localhost:3000,http://localhost:8501
.gitignore ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .idea
2
+ .env
3
+ venv
4
+ # Byte-compiled / optimized / DLL files
5
+ __pycache__/
6
+ *.py[cod]
7
+ *$py.class
8
+
9
+ # C extensions
10
+ *.so
11
+
12
+ # Distribution / packaging
13
+ .Python
14
+ build/
15
+ develop-eggs/
16
+ dist/
17
+ downloads/
18
+ eggs/
19
+ .eggs/
20
+ lib/
21
+ lib64/
22
+ parts/
23
+ sdist/
24
+ var/
25
+ wheels/
26
+ share/python-wheels/
27
+ *.egg-info/
28
+ .installed.cfg
29
+ *.egg
30
+ MANIFEST
31
+
32
+ # PyInstaller
33
+ # Usually these files are written by a python script from a template
34
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
35
+ *.manifest
36
+ *.spec
37
+
38
+ # Installer logs
39
+ pip-log.txt
40
+ pip-delete-this-directory.txt
41
+
42
+ # Unit test / coverage reports
43
+ htmlcov/
44
+ .tox/
45
+ .nox/
46
+ .coverage
47
+ .coverage.*
48
+ .cache
49
+ nosetests.xml
50
+ coverage.xml
51
+ *.cover
52
+ *.py,cover
53
+ .hypothesis/
54
+ .pytest_cache/
55
+ cover/
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ db.sqlite3
65
+ db.sqlite3-journal
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/_build/
76
+
77
+ # PyBuilder
78
+ .pybuilder/
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # IPython
85
+ profile_default/
86
+ ipython_config.py
87
+
88
+ # pyenv
89
+ # For a library or package, you might want to ignore these files since the code is
90
+ # intended to run in multiple environments; otherwise, check them in:
91
+ # .python-version
92
+
93
+ # pipenv
94
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
96
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
97
+ # install all needed dependencies.
98
+ #Pipfile.lock
99
+
100
+ # UV
101
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
102
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
103
+ # commonly ignored for libraries.
104
+ #uv.lock
105
+
106
+ # poetry
107
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
108
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
109
+ # commonly ignored for libraries.
110
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
111
+ #poetry.lock
112
+
113
+ # pdm
114
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
115
+ #pdm.lock
116
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
117
+ # in version control.
118
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
119
+ .pdm.toml
120
+ .pdm-python
121
+ .pdm-build/
122
+
123
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
124
+ __pypackages__/
125
+
126
+ # Celery stuff
127
+ celerybeat-schedule
128
+ celerybeat.pid
129
+
130
+ # SageMath parsed files
131
+ *.sage.py
132
+
133
+ # Environments
134
+ .env
135
+ .venv
136
+ env/
137
+ venv/
138
+ ENV/
139
+ env.bak/
140
+ venv.bak/
141
+
142
+ # Spyder project settings
143
+ .spyderproject
144
+ .spyproject
145
+
146
+ # Rope project settings
147
+ .ropeproject
148
+
149
+ # mkdocs documentation
150
+ /site
151
+
152
+ # mypy
153
+ .mypy_cache/
154
+ .dmypy.json
155
+ dmypy.json
156
+
157
+ # Pyre type checker
158
+ .pyre/
159
+
160
+ # pytype static type analyzer
161
+ .pytype/
162
+
163
+ # Cython debug symbols
164
+ cython_debug/
165
+
166
+ # PyCharm
167
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
168
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
169
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
170
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
171
+ #.idea/
172
+
173
+ # Abstra
174
+ # Abstra is an AI-powered process automation framework.
175
+ # Ignore directories containing user credentials, local state, and settings.
176
+ # Learn more at https://abstra.io/docs
177
+ .abstra/
178
+
179
+ # Visual Studio Code
180
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
181
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
182
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
183
+ # you could uncomment the following to ignore the enitre vscode folder
184
+ # .vscode/
185
+
186
+ # Ruff stuff:
187
+ .ruff_cache/
188
+
189
+ # PyPI configuration file
190
+ .pypirc
191
+
192
+ # Cursor
193
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
194
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
195
+ # refer to https://docs.cursor.com/context/ignore-files
196
+ .cursorignore
197
+ .cursorindexingignore
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # Minimalne zależności
4
+ RUN apt-get update && apt-get install -y \
5
+ curl \
6
+ && rm -rf /var/lib/apt/lists/*
7
+
8
+ # Python dependencies (tylko 3 pakiety!)
9
+ COPY requirements.txt .
10
+ RUN pip install --no-cache-dir -r requirements.txt
11
+
12
+ # Aplikacja
13
+ WORKDIR /app
14
+ COPY app/ .
15
+
16
+ # Port Streamlit
17
+ EXPOSE 8501
18
+
19
+ # Uruchomienie
20
+ CMD ["streamlit", "run", "main.py", "--server.address", "0.0.0.0", "--server.port", "8501"]
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
Makefile ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .PHONY: help install build up down stop restart logs clean test lint format check-env open open-ui open-ollama
2
+
3
+ # Default target
4
+ help:
5
+ @echo "\nLLM Demo - Available commands:\n"
6
+ @echo " make install Install Python dependencies"
7
+ @echo " make build Build Docker containers"
8
+ @echo " make up Start all services in detached mode"
9
+ @echo " make down Stop and remove all containers, networks, and volumes"
10
+ @echo " make stop Stop all running containers"
11
+ @echo " make restart Restart all services"
12
+ @echo " make logs Follow container logs"
13
+ @echo " make logs-ollama Follow Ollama container logs"
14
+ @echo " make logs-ui Follow Streamlit UI logs"
15
+ @echo " make clean Remove all containers, networks, and volumes"
16
+ @echo " make test Run tests"
17
+ @echo " make lint Run linter"
18
+ @echo " make format Format code"
19
+ @echo " make shell-ollama Open shell in Ollama container"
20
+ @echo " make shell-ui Open shell in Streamlit UI container"
21
+ @echo " make open Open all services in browser"
22
+ @echo " make open-ui Open Streamlit UI in browser"
23
+ @echo " make open-ollama Open Ollama API in browser"
24
+
25
+ # Check if .env file exists
26
+ check-env:
27
+ @if [ ! -f .env ]; then \
28
+ echo "Error: .env file not found. Please create one from .env.example"; \
29
+ exit 1; \
30
+ fi
31
+
32
+ # Install Python dependencies
33
+ install:
34
+ @echo "Installing Python dependencies..."
35
+ python -m pip install --upgrade pip
36
+ pip install -r requirements.txt
37
+
38
+ # Build Docker containers
39
+ build: check-env
40
+ @echo "Building Docker containers..."
41
+ docker-compose build
42
+
43
+ # Start all services in detached mode
44
+ up: check-env
45
+ @echo "Starting all services..."
46
+ docker-compose up -d
47
+
48
+ # Stop and remove all containers, networks, and volumes
49
+ down:
50
+ @echo "Stopping and removing all containers..."
51
+ docker-compose down -v
52
+
53
+ # Stop and remove all containers, networks, and images
54
+ stop:
55
+ @echo "Stopping and removing all containers, networks, and images..."
56
+ docker-compose down --rmi all --volumes --remove-orphans
57
+ @echo "Removing unused Docker resources..."
58
+ docker system prune -a -f --volumes
59
+ @echo "Removing all unused Docker networks..."
60
+ docker network prune -f
61
+ @echo "Removing all unused Docker volumes..."
62
+ docker volume prune -f
63
+
64
+ # Restart all services
65
+ restart: stop up
66
+
67
+ # Follow container logs
68
+ logs:
69
+ docker-compose logs -f
70
+
71
+ # Follow Ollama container logs
72
+ logs-ollama:
73
+ docker-compose logs -f ollama
74
+
75
+ # Follow Streamlit UI logs
76
+ logs-ui:
77
+ docker-compose logs -f streamlit-ui
78
+
79
+ # Alias for stop (for backward compatibility)
80
+ clean: stop
81
+
82
+ # Run tests
83
+ test:
84
+ @echo "Running tests..."
85
+ # Add your test command here
86
+ # Example: python -m pytest tests/
87
+
88
+ # Lint code
89
+ lint:
90
+ @echo "Running linter..."
91
+ # Add your lint command here
92
+ # Example: pylint app/
93
+
94
+ # Format code
95
+ format:
96
+ @echo "Formatting code..."
97
+ # Add your format command here
98
+ # Example: black app/
99
+
100
+ # Open shell in Ollama container
101
+ shell-ollama:
102
+ docker-compose exec ollama /bin/sh
103
+
104
+ # Open shell in Streamlit UI container
105
+ shell-ui:
106
+ docker-compose exec streamlit-ui /bin/sh
107
+
108
+ # Open all services in browser
109
+ open: open-ui open-ollama
110
+
111
+ # Open Streamlit UI in browser
112
+ open-ui:
113
+ @echo "Opening Streamlit UI..."
114
+ @xdg-open http://localhost:8501 2>/dev/null || open http://localhost:8501 2>/dev/null || start http://localhost:8501 2>/dev/null || echo "Could not open the browser. Please open http://localhost:8501 manually"
115
+
116
+ # Open Ollama API in browser
117
+ open-ollama:
118
+ @echo "Opening Ollama API..."
119
+ @xdg-open http://localhost:11436 2>/dev/null || open http://localhost:11436 2>/dev/null || start http://localhost:11436 2>/dev/null || echo "Could not open the browser. Please open http://localhost:11436 manually"
Modelfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ./my_custom_model.gguf
2
+
3
+ # Model metadata
4
+ PARAMETER temperature 0.7
5
+ PARAMETER top_p 0.9
6
+ PARAMETER top_k 40
7
+ PARAMETER num_ctx 2048
8
+
9
+ # System prompt
10
+ SYSTEM "Jesteś pomocnym asystentem AI stworzonym specjalnie dla polskich użytkowników.
11
+ Odpowiadasz w języku polskim, jesteś precyzyjny i pomocny.
12
+ Specjalizujesz się w programowaniu, technologii i sztucznej inteligencji."
13
+
14
+ # Chat template dla Mistral
15
+ TEMPLATE "<s>[INST] {{ if .System }}{{ .System }}{{ end }}{{ .Prompt }} [/INST] {{ .Response }}</s>"
16
+
17
+ # Metadata
18
+ PARAMETER num_predict 256
19
+ PARAMETER stop "<s>"
20
+ PARAMETER stop "[INST]"
21
+ PARAMETER stop "[/INST]"
Modelfile.template ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Modelfile for custom fine-tuned Mistral model
2
+ # Generated by convert_to_gguf.sh
3
+
4
+ FROM ./my_custom_model.gguf
5
+
6
+ # Model parameters optimized for RTX 3050
7
+ PARAMETER temperature 0.7
8
+ PARAMETER top_p 0.9
9
+ PARAMETER top_k 40
10
+ PARAMETER num_ctx 2048
11
+ PARAMETER num_predict 512
12
+ PARAMETER repeat_penalty 1.1
13
+ PARAMETER repeat_last_n 64
14
+
15
+ # Stop tokens for Mistral format
16
+ PARAMETER stop "<s>"
17
+ PARAMETER stop "[INST]"
18
+ PARAMETER stop "[/INST]"
19
+ PARAMETER stop "</s>"
20
+
21
+ # System prompt - customize this for your use case
22
+ SYSTEM """Jesteś pomocnym asystentem AI stworzonym przez fine-tuning modelu Mistral 7B.
23
+
24
+ Twoje specjalizacje:
25
+ - Programowanie w Pythonie
26
+ - Machine Learning i AI
27
+ - Docker i DevOps
28
+ - Wyjaśnianie technicznych konceptów
29
+
30
+ Odpowiadasz w języku polskim, jesteś precyzyjny i podajesz praktyczne przykłady.
31
+ Gdy nie znasz odpowiedzi, uczciwie to przyznajesz.
32
+ """
33
+
34
+ # Chat template for Mistral Instruct format
35
+ TEMPLATE """<s>{{- if .System }}[INST] {{ .System }}
36
+
37
+ {{ .Prompt }} [/INST]{{ else }}[INST] {{ .Prompt }} [/INST]{{ end }} {{ .Response }}</s>"""
38
+
39
+ # Model metadata
40
+ PARAMETER num_thread 4
41
+ PARAMETER num_gpu_layers 20
README.md CHANGED
@@ -1,3 +1,388 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🚀 Minimalne LLM + Własny Model - Kompletny Guide
2
+
3
+ ## 🎯 **CZĘŚĆ 1: Uruchomienie w 2 minuty**
4
+
5
+ ### Szybki start (minimalne rozwiązanie)
6
+ ```bash
7
+ # 1. Sklonuj pliki
8
+ git clone <your-repo>
9
+ cd minimal-llm
10
+
11
+ # 2. Uruchom wszystko jedną komendą
12
+ chmod +x quick-start.sh
13
+ ./quick-start.sh
14
+
15
+ # 3. Otwórz przeglądarkę
16
+ # http://localhost:8501 - Streamlit UI
17
+ # http://localhost:11434 - Ollama API
18
+ ```
19
+
20
+ ### Co się dzieje pod spodem?
21
+ - **Ollama** - pobiera i uruchamia Mistral 7B
22
+ - **Streamlit** - prosty chat interface
23
+ - **Docker** - wszystko w kontenerach
24
+ - **Minimalne zależności** - tylko 3 pakiety Python!
25
+
26
+ ## 📁 **Struktura projektu (minimalna)**
27
+ ```
28
+ minimal-llm/
29
+ ├── docker-compose.yml # 1 plik - cała infrastruktura
30
+ ├── Dockerfile # Minimalne image
31
+ ├── requirements.txt # 3 pakiety
32
+ ├── quick-start.sh # 1 komenda = pełny setup
33
+ └── app/
34
+ └── main.py # 50 linijek = pełny chat
35
+ ```
36
+
37
+ ## 🎯 **CZĘŚĆ 2: Stwórz własny model LLM**
38
+
39
+ ### Krok 1: Przygotowanie środowiska
40
+ ```bash
41
+ # Instalacja zależności do fine-tuningu
42
+ pip install -r model_requirements.txt
43
+
44
+ # Login do Hugging Face (do publikacji)
45
+ huggingface-cli login
46
+ ```
47
+
48
+ ### Krok 2: Przygotowanie danych
49
+ ```bash
50
+ python create_custom_model.py
51
+ # Wybierz opcję 1: Stwórz sample dataset
52
+ ```
53
+
54
+ Przykład danych treningowych:
55
+ ```json
56
+ [
57
+ {
58
+ "instruction": "Jak nazywa się stolica Polski?",
59
+ "input": "",
60
+ "output": "Stolica Polski to Warszawa."
61
+ },
62
+ {
63
+ "instruction": "Wyjaśnij czym jest sztuczna inteligencja",
64
+ "input": "",
65
+ "output": "Sztuczna inteligencja (AI) to dziedzina informatyki..."
66
+ }
67
+ ]
68
+ ```
69
+
70
+ ### Krok 3: Fine-tuning modelu
71
+ ```bash
72
+ # Uruchom fine-tuning (wymaga GPU)
73
+ python create_custom_model.py
74
+ # Wybierz opcję 2: Fine-tune model
75
+
76
+ # Lub pełny pipeline
77
+ python create_custom_model.py
78
+ # Wybierz opcję 6: Pełny pipeline
79
+ ```
80
+
81
+ **Optymalizacje dla RTX 3050:**
82
+ - 4-bit quantization
83
+ - LoRA (Low-Rank Adaptation)
84
+ - Batch size = 1
85
+ - Gradient accumulation = 4
86
+ - Mixed precision (FP16)
87
+
88
+ ### Krok 4: Konwersja do GGUF
89
+ ```bash
90
+ # Automatycznie generowany skrypt
91
+ ./convert_to_gguf.sh
92
+ ```
93
+
94
+ ### Krok 5: Stworzenie modelu w Ollama
95
+ ```bash
96
+ # Utwórz Modelfile
97
+ python create_custom_model.py # wybierz opcję 4
98
+
99
+ # Stwórz model w Ollama
100
+ ollama create wronai -f Modelfile
101
+
102
+ # Uruchom model
103
+ ollama run wronai
104
+ ```
105
+
106
+ ### Uruchamianie skryptu
107
+ Skrypt `create_custom_model.py` oferuje interaktywne menu z następującymi opcjami:
108
+
109
+ ```bash
110
+ python create_custom_model.py
111
+ ```
112
+
113
+ Dostępne opcje:
114
+ 1. Stwórz przykładowy dataset
115
+ 2. Wykonaj fine-tuning modelu
116
+ 3. Konwertuj model do formatu GGUF
117
+ 4. Utwórz Modelfile dla Ollama
118
+ 5. Opublikuj model na Hugging Face
119
+ 6. Wykonaj pełny pipeline (1-5)
120
+
121
+ ### Wymagania wstępne
122
+ - Python 3.8+
123
+ - PyTorch z obsługą CUDA (zalecane)
124
+ - Biblioteki wymienione w `model_requirements.txt`
125
+ - Konto na [Hugging Face](https://huggingface.co/) (do publikacji modelu)
126
+
127
+ ### Rozwiązywanie problemów
128
+
129
+ #### Błąd składni w skrypcie
130
+ Jeśli napotkasz błąd składni, upewnij się, że:
131
+ 1. Używasz Pythona 3.8 lub nowszego
132
+ 2. Wszystkie zależności są zainstalowane
133
+ 3. Plik nie został uszkodzony podczas pobierania
134
+
135
+ #### Problemy z zależnościami
136
+ ```bash
137
+ # Utwórz i aktywuj środowisko wirtualne
138
+ python -m venv .venv
139
+ source .venv/bin/activate # Linux/Mac
140
+ .venv\Scripts\activate # Windows
141
+
142
+ # Zainstaluj zależności
143
+ pip install -r model_requirements.txt
144
+ ```
145
+
146
+ #### Brakujące uprawnienia
147
+ Jeśli napotkasz problemy z uprawnieniami, spróbuj:
148
+ ```bash
149
+ # Nadaj uprawnienia do wykonywania skryptów
150
+ chmod +x *.sh
151
+
152
+ # Uruchom z uprawnieniami administratora (jeśli potrzebne)
153
+ sudo python create_custom_model.py
154
+ ```
155
+
156
+ ### Kontrybucja
157
+ Zapraszamy do zgłaszania problemów i propozycji zmian poprzez Issues i Pull Requests.
158
+
159
+ # Test modelu
160
+ ollama run wronai "Cześć! Kim jesteś?"
161
+ ```
162
+
163
+ ### Krok 6: Publikacja modelu
164
+
165
+ #### **Opcja A: Ollama Registry**
166
+ ```bash
167
+ # Push do Ollama Library
168
+ ollama push wronai
169
+
170
+ # Teraz każdy może użyć:
171
+ ollama pull your-username/wronai
172
+ ```
173
+
174
+ #### **Opcja B: Hugging Face Hub**
175
+ ```bash
176
+ # Publikacja na HF
177
+ python publish_to_hf.py
178
+
179
+ # Model dostępny na:
180
+ # https://huggingface.co/your-username/my-custom-mistral-7b
181
+ ```
182
+
183
+ #### **Opcja C: Docker Registry**
184
+ ```bash
185
+ # Spakuj do Docker image
186
+ docker build -t my-custom-llm .
187
+ docker tag my-custom-llm your-registry/my-custom-llm
188
+ docker push your-registry/my-custom-llm
189
+ ```
190
+
191
+ ## 🎯 **CZĘŚĆ 3: Gotowe alternatywy (zero kodu)**
192
+
193
+ ### **1. Najprostsze - Ollama**
194
+ ```bash
195
+ # Instalacja
196
+ curl -fsSL https://ollama.ai/install.sh | sh
197
+
198
+ # Uruchomienie modelu
199
+ ollama run mistral:7b-instruct
200
+
201
+ # API automatycznie na localhost:11434
202
+ ```
203
+
204
+ ### **2. Hugging Face Inference API**
205
+ ```python
206
+ import requests
207
+
208
+ headers = {"Authorization": "Bearer YOUR_HF_TOKEN"}
209
+ response = requests.post(
210
+ "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.1",
211
+ headers=headers,
212
+ json={"inputs": "Hello!"}
213
+ )
214
+ ```
215
+
216
+ ### **3. Groq (ultra szybkie)**
217
+ ```python
218
+ from openai import OpenAI
219
+
220
+ client = OpenAI(
221
+ api_key="YOUR_GROQ_KEY",
222
+ base_url="https://api.groq.com/openai/v1"
223
+ )
224
+
225
+ response = client.chat.completions.create(
226
+ model="mistral-7b-instruct",
227
+ messages=[{"role": "user", "content": "Hello!"}]
228
+ )
229
+ ```
230
+
231
+ ### **4. Together.ai**
232
+ ```python
233
+ from openai import OpenAI
234
+
235
+ client = OpenAI(
236
+ api_key="YOUR_TOGETHER_KEY",
237
+ base_url="https://api.together.xyz/v1"
238
+ )
239
+
240
+ # Kompatybilne z OpenAI API
241
+ ```
242
+
243
+ ### **5. Modal.com (serverless GPU)**
244
+ ```python
245
+ import modal
246
+
247
+ stub = modal.Stub("llm-api")
248
+
249
+ @stub.function(gpu="T4")
250
+ def generate(prompt: str):
251
+ # Twój kod modelu
252
+ return model.generate(prompt)
253
+
254
+ # Deploy jedną komendą
255
+ # modal deploy
256
+ ```
257
+
258
+ ## 🎯 **CZĘŚĆ 4: Frontend opcje**
259
+
260
+ ### **1. Streamlit (Python)**
261
+ ```python
262
+ import streamlit as st
263
+
264
+ st.title("My LLM Chat")
265
+ prompt = st.text_input("Message:")
266
+ if st.button("Send"):
267
+ response = generate(prompt)
268
+ st.write(response)
269
+ ```
270
+
271
+ ### **2. Gradio (Python)**
272
+ ```python
273
+ import gradio as gr
274
+
275
+ def chat(message, history):
276
+ response = generate(message)
277
+ history.append([message, response])
278
+ return "", history
279
+
280
+ gr.ChatInterface(chat).launch()
281
+ ```
282
+
283
+ ### **3. Next.js + Vercel AI SDK**
284
+ ```tsx
285
+ import { useChat } from 'ai/react'
286
+
287
+ export default function Chat() {
288
+ const { messages, input, handleInputChange, handleSubmit } = useChat()
289
+
290
+ return (
291
+ <div>
292
+ {messages.map(m => <div key={m.id}>{m.content}</div>)}
293
+ <form onSubmit={handleSubmit}>
294
+ <input value={input} onChange={handleInputChange} />
295
+ </form>
296
+ </div>
297
+ )
298
+ }
299
+ ```
300
+
301
+ ## 🎯 **CZĘŚĆ 5: Porównanie rozwiązań**
302
+
303
+ | Rozwiązanie | Setup Time | Kód | Hosting | GPU |
304
+ |-------------|------------|-----|---------|-----|
305
+ | **Ollama + Streamlit** | 2 min | 50 linijek | Local/Docker | Optional |
306
+ | **Hugging Face API** | 30 sec | 5 linijek | Cloud | No |
307
+ | **Groq API** | 1 min | 5 linijek | Cloud | No |
308
+ | **Modal.com** | 5 min | 20 linijek | Serverless | Auto |
309
+ | **Custom Fine-tuning** | 2 hours | 200 linijek | Self-hosted | Required |
310
+
311
+ ## 🛠️ **Debugging & Tips**
312
+
313
+ ### Typowe problemy
314
+ ```bash
315
+ # Model nie ładuje się
316
+ docker logs ollama-engine
317
+
318
+ # Brak GPU
319
+ docker run --rm --gpus all nvidia/cuda:11.8-base nvidia-smi
320
+
321
+ # Port zajęty
322
+ sudo netstat -tlnp | grep 11434
323
+
324
+ # Restart wszystkiego
325
+ docker compose down && docker compose up -d
326
+ ```
327
+
328
+ ### Optymalizacje RTX 3050
329
+ ```python
330
+ # W fine-tuningu
331
+ model = AutoModelForCausalLM.from_pretrained(
332
+ model_name,
333
+ load_in_4bit=True, # 4-bit quantization
334
+ torch_dtype=torch.float16 # Half precision
335
+ )
336
+
337
+ # Training args
338
+ TrainingArguments(
339
+ per_device_train_batch_size=1, # Mały batch
340
+ gradient_accumulation_steps=4, # Gradients accumulation
341
+ fp16=True # Mixed precision
342
+ )
343
+ ```
344
+
345
+ ### Monitoring zasobów
346
+ ```bash
347
+ # GPU monitoring
348
+ watch -n 1 nvidia-smi
349
+
350
+ # Container resources
351
+ docker stats
352
+
353
+ # Model memory usage
354
+ docker exec -it ollama-engine ollama ps
355
+ ```
356
+
357
+ ## 🎯 **Następne kroki**
358
+
359
+ ### Dla nauki:
360
+ 1. **Eksperymentuj z różnymi modelami** - Llama, CodeLlama, Phi-3
361
+ 2. **Testuj różne techniki fine-tuningu** - LoRA, QLoRA, Full fine-tuning
362
+ 3. **Dodaj RAG** - Retrieval Augmented Generation
363
+ 4. **Stwórz multi-agent system**
364
+
365
+ ### Dla produkcji:
366
+ 1. **Przejdź na managed service** - Groq, Together.ai
367
+ 2. **Setup monitoring** - LangSmith, Weights & Biases
368
+ 3. **Dodaj cache** - Redis dla odpowiedzi
369
+ 4. **Implement rate limiting**
370
+
371
+ ### Dla biznesu:
372
+ 1. **Fine-tune na własnych danych**
373
+ 2. **Setup A/B testing** różnych modeli
374
+ 3. **Dodaj feedback loop** od użytkowników
375
+ 4. **Monetize API**
376
+
377
+ ## 🎉 **Podsumowanie**
378
+
379
+ **Wybierz opcję według potrzeb:**
380
+
381
+ - **Demo/nauka**: Ollama + Streamlit (to rozwiązanie)
382
+ - **Prototyp**: Hugging Face API + Gradio
383
+ - **MVP**: Groq API + Next.js
384
+ - **Produkcja**: Modal/RunPod + custom frontend
385
+ - **Enterprise**: Fine-tuned model + własna infrastruktura
386
+
387
+ **Minimalne rozwiązanie = 5 plików, 50 linijek kodu, 2 minuty setup!**
388
+
app/main.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Minimalna aplikacja LLM w 50 linijkach!
4
+ Streamlit + Ollama = zero konfiguracji
5
+ """
6
+
7
+ import streamlit as st
8
+ import ollama
9
+ import os
10
+ from typing import Generator
11
+
12
+ # Konfiguracja
13
+ OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434")
14
+ MODEL_NAME = "mistral:7b-instruct"
15
+
16
+ # Setup Ollama client
17
+ client = ollama.Client(host=OLLAMA_URL)
18
+
19
+ def stream_response(prompt: str) -> Generator[str, None, None]:
20
+ """Generator dla streaming response"""
21
+ try:
22
+ stream = client.chat(
23
+ model=MODEL_NAME,
24
+ messages=[{"role": "user", "content": prompt}],
25
+ stream=True
26
+ )
27
+
28
+ for chunk in stream:
29
+ if chunk['message']['content']:
30
+ yield chunk['message']['content']
31
+ except Exception as e:
32
+ yield f"Error: {str(e)}"
33
+
34
+ def main():
35
+ # UI Setup
36
+ st.set_page_config(
37
+ page_title="🤖 Minimal LLM Chat",
38
+ page_icon="🤖",
39
+ layout="wide"
40
+ )
41
+
42
+ st.title("🤖 Minimal LLM Chat")
43
+ st.markdown("*Powered by Ollama + Mistral 7B*")
44
+
45
+ # Sidebar z ustawieniami
46
+ with st.sidebar:
47
+ st.header("⚙️ Settings")
48
+
49
+ # Model info
50
+ try:
51
+ models = client.list()
52
+ available_models = [m['name'] for m in models['models']]
53
+ st.success(f"✅ Connected to Ollama")
54
+ st.info(f"Available models: {len(available_models)}")
55
+ except:
56
+ st.error("❌ Cannot connect to Ollama")
57
+ st.stop()
58
+
59
+ # Parameters
60
+ temperature = st.slider("Temperature", 0.0, 2.0, 0.7, 0.1)
61
+ max_tokens = st.slider("Max tokens", 50, 1000, 500, 50)
62
+
63
+ # System prompt
64
+ system_prompt = st.text_area(
65
+ "System prompt:",
66
+ "You are a helpful AI assistant. Answer concisely and accurately.",
67
+ height=100
68
+ )
69
+
70
+ # Chat interface
71
+ if "messages" not in st.session_state:
72
+ st.session_state.messages = []
73
+
74
+ # Display chat history
75
+ for message in st.session_state.messages:
76
+ with st.chat_message(message["role"]):
77
+ st.markdown(message["content"])
78
+
79
+ # Chat input
80
+ if prompt := st.chat_input("Type your message here..."):
81
+ # Add user message
82
+ st.session_state.messages.append({"role": "user", "content": prompt})
83
+ with st.chat_message("user"):
84
+ st.markdown(prompt)
85
+
86
+ # Generate response
87
+ with st.chat_message("assistant"):
88
+ response_placeholder = st.empty()
89
+ full_response = ""
90
+
91
+ # Streaming response
92
+ for chunk in stream_response(prompt):
93
+ full_response += chunk
94
+ response_placeholder.markdown(full_response + "▌")
95
+
96
+ response_placeholder.markdown(full_response)
97
+
98
+ # Add assistant message
99
+ st.session_state.messages.append({"role": "assistant", "content": full_response})
100
+
101
+ # Quick actions
102
+ col1, col2, col3 = st.columns(3)
103
+
104
+ with col1:
105
+ if st.button("🗑️ Clear Chat"):
106
+ st.session_state.messages = []
107
+ st.rerun()
108
+
109
+ with col2:
110
+ if st.button("💡 Example Question"):
111
+ example = "Explain quantum computing in simple terms"
112
+ st.session_state.messages.append({"role": "user", "content": example})
113
+ st.rerun()
114
+
115
+ with col3:
116
+ if st.button("📊 Model Info"):
117
+ try:
118
+ info = client.show(MODEL_NAME)
119
+ st.json(info)
120
+ except:
121
+ st.error("Cannot get model info")
122
+
123
+ if __name__ == "__main__":
124
+ main()
convert_to_gguf.sh ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # 🔄 Convert fine-tuned model to GGUF format for Ollama
4
+ # This script converts your custom fine-tuned model to GGUF format
5
+
6
+ set -e
7
+
8
+ # Colors for output
9
+ RED='\033[0;31m'
10
+ GREEN='\033[0;32m'
11
+ YELLOW='\033[1;33m'
12
+ BLUE='\033[0;34m'
13
+ NC='\033[0m' # No Color
14
+
15
+ echo -e "${BLUE}🔄 Converting Model to GGUF Format${NC}"
16
+ echo "====================================="
17
+
18
+ # Configuration
19
+ MODEL_DIR="./fine_tuned_model"
20
+ OUTPUT_FILE="my_custom_model.gguf"
21
+ LLAMA_CPP_DIR="./llama.cpp"
22
+
23
+ # Check if fine-tuned model exists
24
+ if [ ! -d "$MODEL_DIR" ]; then
25
+ echo -e "${RED}❌ Fine-tuned model not found at: $MODEL_DIR${NC}"
26
+ echo "Run fine-tuning first: python create_custom_model.py (option 2)"
27
+ exit 1
28
+ fi
29
+
30
+ echo -e "${GREEN}✅ Found fine-tuned model at: $MODEL_DIR${NC}"
31
+
32
+ # Check if llama.cpp exists, if not clone it
33
+ if [ ! -d "$LLAMA_CPP_DIR" ]; then
34
+ echo -e "${YELLOW}📥 Cloning llama.cpp...${NC}"
35
+ git clone https://github.com/ggerganov/llama.cpp.git
36
+
37
+ echo -e "${YELLOW}🔨 Building llama.cpp...${NC}"
38
+ cd llama.cpp
39
+
40
+ # Build with CUDA support if available
41
+ if command -v nvcc &> /dev/null; then
42
+ echo -e "${GREEN}🚀 NVIDIA CUDA detected, building with GPU support${NC}"
43
+ make LLAMA_CUBLAS=1 -j$(nproc)
44
+ else
45
+ echo -e "${YELLOW}⚠️ No CUDA detected, building CPU-only version${NC}"
46
+ make -j$(nproc)
47
+ fi
48
+
49
+ cd ..
50
+ else
51
+ echo -e "${GREEN}✅ llama.cpp already exists${NC}"
52
+ fi
53
+
54
+ # Check required Python dependencies
55
+ echo -e "${BLUE}📦 Checking Python dependencies...${NC}"
56
+ python3 -c "import torch, transformers, sentencepiece" 2>/dev/null || {
57
+ echo -e "${YELLOW}⚠️ Installing missing dependencies...${NC}"
58
+ pip install torch transformers sentencepiece protobuf
59
+ }
60
+
61
+ # Convert model to GGUF
62
+ echo -e "${BLUE}🔄 Converting to GGUF format...${NC}"
63
+ echo "This may take several minutes..."
64
+
65
+ # Method 1: Direct conversion (recommended)
66
+ if [ -f "$LLAMA_CPP_DIR/convert.py" ]; then
67
+ echo -e "${GREEN}Using convert.py${NC}"
68
+ python3 "$LLAMA_CPP_DIR/convert.py" \
69
+ "$MODEL_DIR" \
70
+ --outtype f16 \
71
+ --outfile "$OUTPUT_FILE"
72
+ else
73
+ # Method 2: Convert via HF format (fallback)
74
+ echo -e "${YELLOW}Using alternative conversion method${NC}"
75
+ python3 -c "
76
+ import torch
77
+ from transformers import AutoModelForCausalLM, AutoTokenizer
78
+ import sys
79
+ import os
80
+
81
+ print('Loading model...')
82
+ model = AutoModelForCausalLM.from_pretrained('$MODEL_DIR', torch_dtype=torch.float16)
83
+ tokenizer = AutoTokenizer.from_pretrained('$MODEL_DIR')
84
+
85
+ print('Saving in HF format...')
86
+ model.save_pretrained('./temp_hf_model', safe_serialization=True)
87
+ tokenizer.save_pretrained('./temp_hf_model')
88
+ print('Conversion to HF format complete')
89
+ "
90
+
91
+ # Then convert HF to GGUF
92
+ if [ -d "./temp_hf_model" ]; then
93
+ python3 "$LLAMA_CPP_DIR/convert.py" \
94
+ "./temp_hf_model" \
95
+ --outtype f16 \
96
+ --outfile "$OUTPUT_FILE"
97
+ rm -rf ./temp_hf_model
98
+ fi
99
+ fi
100
+
101
+ # Verify conversion
102
+ if [ -f "$OUTPUT_FILE" ]; then
103
+ FILE_SIZE=$(du -h "$OUTPUT_FILE" | cut -f1)
104
+ echo
105
+ echo -e "${GREEN}🎉 Conversion successful!${NC}"
106
+ echo -e "${BLUE}📄 Output file: $OUTPUT_FILE${NC}"
107
+ echo -e "${BLUE}📊 File size: $FILE_SIZE${NC}"
108
+
109
+ # Optional: Quantize to smaller sizes
110
+ echo
111
+ echo -e "${YELLOW}💡 Optional: Create quantized versions?${NC}"
112
+ read -p "Create Q4_K_M quantized version? (y/N): " -n 1 -r
113
+ echo
114
+ if [[ $REPLY =~ ^[Yy]$ ]]; then
115
+ echo -e "${BLUE}🔄 Creating Q4_K_M quantized version...${NC}"
116
+ "$LLAMA_CPP_DIR/quantize" "$OUTPUT_FILE" "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" Q4_K_M
117
+
118
+ if [ -f "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" ]; then
119
+ QUANT_SIZE=$(du -h "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" | cut -f1)
120
+ echo -e "${GREEN}✅ Quantized version created: ${OUTPUT_FILE%.gguf}_q4_k_m.gguf ($QUANT_SIZE)${NC}"
121
+ fi
122
+ fi
123
+
124
+ # Test the converted model
125
+ echo
126
+ echo -e "${YELLOW}🧪 Test the converted model?${NC}"
127
+ read -p "Run a quick test? (y/N): " -n 1 -r
128
+ echo
129
+ if [[ $REPLY =~ ^[Yy]$ ]]; then
130
+ echo -e "${BLUE}🧪 Testing model...${NC}"
131
+ echo "Prompt: 'Hello, how are you?'"
132
+ echo "Response:"
133
+ "$LLAMA_CPP_DIR/main" -m "$OUTPUT_FILE" -p "Hello, how are you?" -n 50 --temp 0.7
134
+ fi
135
+
136
+ else
137
+ echo -e "${RED}❌ Conversion failed!${NC}"
138
+ echo "Check the error messages above."
139
+ exit 1
140
+ fi
141
+
142
+ # Instructions for next steps
143
+ echo
144
+ echo -e "${GREEN}🎯 Next Steps:${NC}"
145
+ echo "1. Create Ollama Modelfile:"
146
+ echo " python create_custom_model.py # option 4"
147
+ echo
148
+ echo "2. Import to Ollama:"
149
+ echo " ollama create my-custom-model -f Modelfile"
150
+ echo
151
+ echo "3. Test in Ollama:"
152
+ echo " ollama run my-custom-model \"Hello!\""
153
+ echo
154
+ echo "4. Push to Ollama Library:"
155
+ echo " ollama push my-custom-model"
156
+ echo
157
+ echo -e "${BLUE}📚 Files created:${NC}"
158
+ echo " • $OUTPUT_FILE (F16 version)"
159
+ if [ -f "${OUTPUT_FILE%.gguf}_q4_k_m.gguf" ]; then
160
+ echo " • ${OUTPUT_FILE%.gguf}_q4_k_m.gguf (Quantized version)"
161
+ fi
162
+
163
+ echo
164
+ echo -e "${GREEN}🎉 GGUF conversion completed successfully!${NC}"
create_custom_model.py ADDED
@@ -0,0 +1,373 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ 🎯 Tworzenie własnego modelu LLM - od fine-tuningu do publikacji
4
+ Przykład z Mistral 7B + własne dane
5
+ """
6
+
7
+ import os
8
+ import json
9
+ from pathlib import Path
10
+ from datasets import Dataset
11
+ from transformers import (
12
+ AutoModelForCausalLM,
13
+ AutoTokenizer,
14
+ TrainingArguments,
15
+ Trainer
16
+ )
17
+ from peft import LoraConfig, get_peft_model, TaskType
18
+ import torch
19
+
20
+ # === 1. PRZYGOTOWANIE DANYCH ===
21
+
22
+ def create_sample_dataset():
23
+ """Tworzy przykładowy dataset do fine-tuningu"""
24
+
25
+ # Przykładowe dane - zamień na swoje!
26
+ sample_data = [
27
+ {
28
+ "instruction": "Jak nazywa się stolica Polski?",
29
+ "input": "",
30
+ "output": "Stolica Polski to Warszawa."
31
+ },
32
+ {
33
+ "instruction": "Wyjaśnij czym jest sztuczna inteligencja",
34
+ "input": "",
35
+ "output": "Sztuczna inteligencja (AI) to dziedzina informatyki zajmująca się tworzeniem systemów zdolnych do wykonywania zadań wymagających inteligencji."
36
+ },
37
+ {
38
+ "instruction": "Napisz krótką funkcję w Pythonie",
39
+ "input": "funkcja do obliczania silni",
40
+ "output": "```python\ndef factorial(n):\n if n <= 1:\n return 1\n return n * factorial(n-1)\n```"
41
+ },
42
+ {
43
+ "instruction": "Jak zoptymalizować kod Python?",
44
+ "input": "",
45
+ "output": "Główne sposoby optymalizacji kodu Python:\n1. Używaj wbudowanych funkcji\n2. Unikaj pętli, preferuj list comprehensions\n3. Używaj numpy dla operacji numerycznych\n4. Profiluj kod przed optymalizacją"
46
+ },
47
+ {
48
+ "instruction": "Co to jest Docker?",
49
+ "input": "",
50
+ "output": "Docker to platforma konteneryzacji umożliwiająca pakowanie aplikacji wraz z zależnościami w lekkie, przenośne kontenery."
51
+ }
52
+ ]
53
+
54
+ # Zapisz dataset
55
+ os.makedirs("data", exist_ok=True)
56
+ with open("data/training_data.json", "w", encoding="utf-8") as f:
57
+ json.dump(sample_data, f, indent=2, ensure_ascii=False)
58
+
59
+ print("✅ Sample dataset created in data/training_data.json")
60
+ return sample_data
61
+
62
+ def format_training_data(examples):
63
+ """Formatuje dane dla Mistral Instruct"""
64
+ formatted_texts = []
65
+
66
+ for example in examples:
67
+ if example.get("input"):
68
+ prompt = f"<s>[INST] {example['instruction']}\n{example['input']} [/INST] {example['output']}</s>"
69
+ else:
70
+ prompt = f"<s>[INST] {example['instruction']} [/INST] {example['output']}</s>"
71
+ formatted_texts.append(prompt)
72
+
73
+ return {"text": formatted_texts}
74
+
75
+ # === 2. FINE-TUNING Z LORA ===
76
+
77
+ def setup_model_and_tokenizer(model_name="mistralai/Mistral-7B-Instruct-v0.1"):
78
+ """Ładuje model i tokenizer"""
79
+ print(f"📥 Loading model: {model_name}")
80
+
81
+ # Tokenizer
82
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
83
+ tokenizer.pad_token = tokenizer.eos_token
84
+ tokenizer.padding_side = "right"
85
+
86
+ # Model z quantization dla RTX 3050
87
+ model = AutoModelForCausalLM.from_pretrained(
88
+ model_name,
89
+ torch_dtype=torch.float16,
90
+ device_map="auto",
91
+ load_in_4bit=True, # 4-bit quantization
92
+ trust_remote_code=True
93
+ )
94
+
95
+ return model, tokenizer
96
+
97
+ def setup_lora_config():
98
+ """Konfiguracja LoRA dla efficient fine-tuning"""
99
+ return LoraConfig(
100
+ task_type=TaskType.CAUSAL_LM,
101
+ inference_mode=False,
102
+ r=16, # LoRA rank
103
+ lora_alpha=32,
104
+ lora_dropout=0.1,
105
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj"] # Mistral attention modules
106
+ )
107
+
108
+ def fine_tune_model():
109
+ """Główna funkcja fine-tuningu"""
110
+
111
+ # 1. Przygotuj dane
112
+ print("🔄 Preparing training data...")
113
+ sample_data = create_sample_dataset()
114
+
115
+ # 2. Ładuj model
116
+ model, tokenizer = setup_model_and_tokenizer()
117
+
118
+ # 3. Setup LoRA
119
+ lora_config = setup_lora_config()
120
+ model = get_peft_model(model, lora_config)
121
+
122
+ print(f"📊 Trainable parameters: {model.print_trainable_parameters()}")
123
+
124
+ # 4. Przygotuj dataset
125
+ dataset = Dataset.from_list(sample_data)
126
+ formatted_dataset = dataset.map(
127
+ lambda x: format_training_data([x]),
128
+ remove_columns=dataset.column_names
129
+ )
130
+
131
+ # Tokenizacja
132
+ def tokenize_function(examples):
133
+ return tokenizer(
134
+ examples["text"],
135
+ truncation=True,
136
+ padding="max_length",
137
+ max_length=512,
138
+ return_tensors="pt"
139
+ )
140
+
141
+ tokenized_dataset = formatted_dataset.map(tokenize_function, batched=True)
142
+
143
+ # 5. Training arguments - optymalizowane dla RTX 3050
144
+ training_args = TrainingArguments(
145
+ output_dir="./results",
146
+ num_train_epochs=3,
147
+ per_device_train_batch_size=1, # Mały batch size dla RTX 3050
148
+ gradient_accumulation_steps=4,
149
+ warmup_steps=10,
150
+ learning_rate=2e-4,
151
+ fp16=True, # Mixed precision
152
+ logging_steps=1,
153
+ save_strategy="epoch",
154
+ evaluation_strategy="no",
155
+ dataloader_num_workers=0, # Avoid multiprocessing issues
156
+ remove_unused_columns=False,
157
+ )
158
+
159
+ # 6. Trainer
160
+ trainer = Trainer(
161
+ model=model,
162
+ args=training_args,
163
+ train_dataset=tokenized_dataset,
164
+ tokenizer=tokenizer,
165
+ )
166
+
167
+ # 7. Train!
168
+ print("🚀 Starting fine-tuning...")
169
+ trainer.train()
170
+
171
+ # 8. Save model
172
+ model.save_pretrained("./fine_tuned_model")
173
+ tokenizer.save_pretrained("./fine_tuned_model")
174
+
175
+ print("✅ Fine-tuning completed! Model saved to ./fine_tuned_model")
176
+
177
+ return model, tokenizer
178
+
179
+ # === 3. KONWERSJA DO GGUF ===
180
+
181
+ def convert_to_gguf():
182
+ """Konwertuje model do formatu GGUF dla Ollama"""
183
+
184
+ print("🔄 Converting to GGUF format...")
185
+
186
+ # Ten skrypt wymaga llama.cpp
187
+ conversion_script = """
188
+ #!/bin/bash
189
+
190
+ # Pobierz llama.cpp jeśli nie masz
191
+ if [ ! -d "llama.cpp" ]; then
192
+ git clone https://github.com/ggerganov/llama.cpp.git
193
+ cd llama.cpp
194
+ make -j
195
+ cd ..
196
+ fi
197
+
198
+ # Konwertuj model
199
+ python llama.cpp/convert.py ./fine_tuned_model --outtype f16 --outfile my_custom_model.gguf
200
+
201
+ echo "✅ GGUF conversion completed: my_custom_model.gguf"
202
+ """
203
+
204
+ with open("convert_to_gguf.sh", "w") as f:
205
+ f.write(conversion_script)
206
+
207
+ os.chmod("convert_to_gguf.sh", 0o755)
208
+
209
+ print("📝 Created convert_to_gguf.sh script")
210
+ print("Run: ./convert_to_gguf.sh")
211
+
212
+ # === 4. TWORZENIE MODELFILE DLA OLLAMA ===
213
+
214
+ def create_ollama_modelfile():
215
+ """Tworzy Modelfile dla Ollama"""
216
+
217
+ modelfile_content = '''FROM ./my_custom_model.gguf
218
+
219
+ # Model metadata
220
+ PARAMETER temperature 0.7
221
+ PARAMETER top_p 0.9
222
+ PARAMETER top_k 40
223
+ PARAMETER num_ctx 2048
224
+
225
+ # System prompt
226
+ SYSTEM "Jesteś pomocnym asystentem AI stworzonym specjalnie dla polskich użytkowników.\nOdpowiadasz w języku polskim, jesteś precyzyjny i pomocny.\nSpecjalizujesz się w programowaniu, technologii i sztucznej inteligencji."
227
+
228
+ # Chat template dla Mistral
229
+ TEMPLATE "<s>[INST] {{ if .System }}{{ .System }}{{ end }}{{ .Prompt }} [/INST] {{ .Response }}</s>"
230
+
231
+ # Metadata
232
+ PARAMETER num_predict 256
233
+ PARAMETER stop "<s>"
234
+ PARAMETER stop "[INST]"
235
+ PARAMETER stop "[/INST]"
236
+ '''
237
+
238
+ with open("Modelfile", "w", encoding="utf-8") as f:
239
+ f.write(modelfile_content)
240
+ print("✅ Utworzono Modelfile dla Ollama")
241
+ print("✅ Created Modelfile for Ollama")
242
+
243
+ # === 5. PUBLIKACJA MODELU ===
244
+
245
+ def create_model_in_ollama():
246
+ """Tworzy model w Ollama"""
247
+
248
+ ollama_commands = """
249
+ # 1. Utwórz model w Ollama
250
+ ollama create wronai -f Modelfile
251
+
252
+ # 2. Test modelu
253
+ ollama run wronai "Cześć! Kim jesteś?"
254
+
255
+ # 3. Push do Ollama Library (wymaga konta)
256
+ ollama push wronai
257
+
258
+ # 4. Alternatywnie - export do pliku
259
+ ollama save wronai wronai-model.tar
260
+ """
261
+
262
+ with open("ollama_commands.sh", "w") as f:
263
+ f.write(ollama_commands)
264
+
265
+ print("✅ Created ollama_commands.sh")
266
+
267
+ # === 6. PUBLIKACJA NA HUGGING FACE ===
268
+
269
+ def create_hf_publish_script():
270
+ """Skrypt do publikacji na Hugging Face"""
271
+
272
+ hf_script = '''#!/usr/bin/env python3
273
+ """
274
+ Publikacja modelu na Hugging Face Hub
275
+ """
276
+
277
+ from huggingface_hub import HfApi, create_repo
278
+ import os
279
+
280
+ def publish_to_hf():
281
+ # Konfiguracja
282
+ model_name = "your-username/my-custom-mistral-7b"
283
+
284
+ # Login (wymagany HF token)
285
+ # huggingface-cli login
286
+
287
+ # Utwórz repo
288
+ api = HfApi()
289
+
290
+ try:
291
+ create_repo(
292
+ repo_id=model_name,
293
+ repo_type="model",
294
+ private=False # Ustaw True dla prywatnego
295
+ )
296
+ print(f"✅ Repository created: {model_name}")
297
+ except Exception as e:
298
+ print(f"Repository may already exist: {e}")
299
+
300
+ # Upload plików
301
+ api.upload_folder(
302
+ folder_path="./fine_tuned_model",
303
+ repo_id=model_name,
304
+ commit_message="Initial model upload"
305
+ )
306
+
307
+ # Upload GGUF (jeśli istnieje)
308
+ if os.path.exists("my_custom_model.gguf"):
309
+ api.upload_file(
310
+ path_or_fileobj="my_custom_model.gguf",
311
+ path_in_repo="my_custom_model.gguf",
312
+ repo_id=model_name,
313
+ commit_message="Add GGUF version"
314
+ )
315
+
316
+ print(f"🎉 Model published: https://huggingface.co/{model_name}")
317
+
318
+ if __name__ == "__main__":
319
+ publish_to_hf()
320
+ '''
321
+
322
+ with open("publish_to_hf.py", "w") as f:
323
+ f.write(hf_script)
324
+
325
+ print("✅ Created publish_to_hf.py")
326
+
327
+ # === GŁÓWNA FUNKCJA ===
328
+
329
+ def main():
330
+ """Pełny pipeline tworzenia własnego modelu"""
331
+
332
+ print("🎯 Custom LLM Creation Pipeline")
333
+ print("===============================")
334
+
335
+ choice = input("""
336
+ Wybierz opcję:
337
+ 1. Stwórz sample dataset
338
+ 2. Fine-tune model (wymaga GPU)
339
+ 3. Konwertuj do GGUF
340
+ 4. Utwórz Modelfile dla Ollama
341
+ 5. Przygotuj skrypty publikacji
342
+ 6. Pełny pipeline (1-5)
343
+
344
+ Wybór (1-6): """).strip()
345
+
346
+ if choice == "1":
347
+ create_sample_dataset()
348
+ elif choice == "2":
349
+ fine_tune_model()
350
+ elif choice == "3":
351
+ convert_to_gguf()
352
+ elif choice == "4":
353
+ create_ollama_modelfile()
354
+ elif choice == "5":
355
+ create_hf_publish_script()
356
+ elif choice == "6":
357
+ print("🚀 Running full pipeline...")
358
+ create_sample_dataset()
359
+
360
+ if input("Continue with fine-tuning? (y/N): ").lower() == 'y':
361
+ fine_tune_model()
362
+ convert_to_gguf()
363
+
364
+ create_ollama_modelfile()
365
+ create_model_in_ollama()
366
+ create_hf_publish_script()
367
+
368
+ print("✅ Full pipeline completed!")
369
+ else:
370
+ print("Invalid choice")
371
+
372
+ if __name__ == "__main__":
373
+ main()
docker-compose.yml ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ # Load environment variables from .env file
4
+ x-env: &env
5
+ env_file: .env
6
+ environment:
7
+ - OLLAMA_PORT=${OLLAMA_PORT:-11434}
8
+ - OLLAMA_HOST=${OLLAMA_HOST:-0.0.0.0}
9
+ - STREAMLIT_SERVER_PORT=${STREAMLIT_SERVER_PORT:-8501}
10
+ - STREAMLIT_SERVER_ADDRESS=${STREAMLIT_SERVER_ADDRESS:-0.0.0.0}
11
+ - DEFAULT_MODEL=${DEFAULT_MODEL:-mistral:7b-instruct}
12
+
13
+ services:
14
+ # Ollama - LLM Engine (minimalna konfiguracja)
15
+ ollama:
16
+ image: ollama/ollama:latest
17
+ container_name: ollama-engine
18
+ <<: *env
19
+ ports:
20
+ - "${OLLAMA_PORT:-11434}:11434"
21
+ volumes:
22
+ - ollama_data:/root/.ollama
23
+ # deploy:
24
+ # resources:
25
+ # reservations:
26
+ # devices:
27
+ # - driver: nvidia
28
+ # count: 1
29
+ # capabilities: [gpu]
30
+ restart: unless-stopped
31
+
32
+ # Streamlit UI (Python frontend)
33
+ streamlit-ui:
34
+ build: .
35
+ container_name: llm-ui
36
+ <<: *env
37
+ ports:
38
+ - "${STREAMLIT_SERVER_PORT:-8501}:8501"
39
+ depends_on:
40
+ - ollama
41
+ environment:
42
+ - OLLAMA_URL=http://ollama:${OLLAMA_PORT:-11434}
43
+ - STREAMLIT_SERVER_PORT=${STREAMLIT_SERVER_PORT:-8501}
44
+ - STREAMLIT_SERVER_ADDRESS=${STREAMLIT_SERVER_ADDRESS:-0.0.0.0}
45
+ restart: unless-stopped
46
+ volumes:
47
+ - ./app:/app
48
+ healthcheck:
49
+ test: ["CMD", "curl", "-f", "http://localhost:${STREAMLIT_SERVER_PORT:-8501}/_stcore/health"]
50
+ interval: 30s
51
+ timeout: 10s
52
+ retries: 3
53
+
54
+ # Model Setup (inicjalizacja modelu)
55
+ model-setup:
56
+ image: ollama/ollama:latest
57
+ container_name: model-setup
58
+ <<: *env
59
+ depends_on:
60
+ - ollama
61
+ volumes:
62
+ - ollama_data:/root/.ollama
63
+ environment:
64
+ - OLLAMA_HOST=http://ollama:${OLLAMA_PORT:-11434}
65
+ - DEFAULT_MODEL=${DEFAULT_MODEL:-mistral:7b-instruct}
66
+ command: >
67
+ sh -c "
68
+ echo 'Waiting for Ollama to be ready...' &&
69
+ sleep 10 &&
70
+ ollama pull ${DEFAULT_MODEL:-mistral:7b-instruct} &&
71
+ echo 'Model loaded successfully!'
72
+ "
73
+ restart: "no"
74
+
75
+ volumes:
76
+ ollama_data:
example_training_data.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "instruction": "Jak zoptymalizować kod Python dla machine learning?",
4
+ "input": "",
5
+ "output": "Kluczowe optymalizacje dla ML w Pythonie:\n\n1. **Vectoryzacja z NumPy/Pandas**:\n - Unikaj pętli Python, używaj operacji wektorowych\n - `df.apply()` zamiast iteracji przez wiersze\n\n2. **Wykorzystanie GPU**:\n - PyTorch/TensorFlow z CUDA\n - CuPy dla operacji NumPy na GPU\n\n3. **Multiprocessing**:\n - `multiprocessing.Pool` dla CPU-intensive tasks\n - `joblib.Parallel` dla sklearn\n\n4. **Memory management**:\n - `del` niepotrzebnych zmiennych\n - `gc.collect()` dla garbage collection\n - Chunking dla dużych datasets\n\n5. **Profiling**:\n - `cProfile` do znajdowania bottlenecków\n - `memory_profiler` dla użycia pamięci"
6
+ },
7
+ {
8
+ "instruction": "Wyjaśnij Docker dla początkujących",
9
+ "input": "chcę zrozumieć podstawy",
10
+ "output": "Docker to narzędzie do konteneryzacji aplikacji. Podstawowe koncepty:\n\n**Co to jest kontener?**\n- Lekka, przenośna \"paczka\" z aplikacją i wszystkimi jej zależnościami\n- Działa identycznie na każdym systemie\n\n**Podstawowe komendy:**\n```bash\n# Uruchom kontener\ndocker run nginx\n\n# Lista działających kontenerów\ndocker ps\n\n# Zbuduj obraz z Dockerfile\ndocker build -t myapp .\n\n# Zatrzymaj kontener\ndocker stop <container_id>\n```\n\n**Dockerfile przykład:**\n```dockerfile\nFROM python:3.11\nCOPY . /app\nWORKDIR /app\nRUN pip install -r requirements.txt\nCMD [\"python\", \"app.py\"]\n```\n\n**Zalety:**\n- Spójność środowiska (dev = prod)\n- Łatwe deployowanie\n- Izolacja aplikacji\n- Skalowalność"
11
+ },
12
+ {
13
+ "instruction": "Jak stworzyć REST API w FastAPI?",
14
+ "input": "podstawowy przykład",
15
+ "output": "Oto kompletny przykład REST API w FastAPI:\n\n```python\nfrom fastapi import FastAPI, HTTPException\nfrom pydantic import BaseModel\nfrom typing import List, Optional\n\napp = FastAPI(title=\"My API\", version=\"1.0.0\")\n\n# Model danych\nclass Item(BaseModel):\n id: Optional[int] = None\n name: str\n price: float\n description: Optional[str] = None\n\n# \"Baza danych\"\nitems = []\n\n# Endpoints\n@app.get(\"/\")\ndef read_root():\n return {\"message\": \"Hello World\"}\n\n@app.get(\"/items\", response_model=List[Item])\ndef get_items():\n return items\n\n@app.get(\"/items/{item_id}\", response_model=Item)\ndef get_item(item_id: int):\n item = next((x for x in items if x.id == item_id), None)\n if not item:\n raise HTTPException(status_code=404, detail=\"Item not found\")\n return item\n\n@app.post(\"/items\", response_model=Item)\ndef create_item(item: Item):\n item.id = len(items) + 1\n items.append(item)\n return item\n\n@app.put(\"/items/{item_id}\", response_model=Item)\ndef update_item(item_id: int, updated_item: Item):\n for i, item in enumerate(items):\n if item.id == item_id:\n updated_item.id = item_id\n items[i] = updated_item\n return updated_item\n raise HTTPException(status_code=404, detail=\"Item not found\")\n\n@app.delete(\"/items/{item_id}\")\ndef delete_item(item_id: int):\n global items\n items = [x for x in items if x.id != item_id]\n return {\"message\": \"Item deleted\"}\n\n# Uruchomienie\nif __name__ == \"__main__\":\n import uvicorn\n uvicorn.run(app, host=\"0.0.0.0\", port=8000)\n```\n\n**Uruchomienie:**\n```bash\npip install fastapi uvicorn\npython main.py\n```\n\n**Automatyczna dokumentacja:**\n- Swagger UI: http://localhost:8000/docs\n- ReDoc: http://localhost:8000/redoc"
16
+ },
17
+ {
18
+ "instruction": "Wyjaśnij różnice między AI, ML i Deep Learning",
19
+ "input": "",
20
+ "output": "**Hierarchia pojęć (od najszerszego):**\n\n🧠 **Artificial Intelligence (AI)**\n- Najszersze pojęcie\n- Systemy zdolne do wykonywania zadań wymagających \"inteligencji\"\n- Przykłady: chatboty, systemy eksperckie, algorytmy gier\n\n🤖 **Machine Learning (ML) ⊂ AI**\n- Podzbiór AI\n- Algorytmy uczące się z danych bez jawnego programowania\n- Główne typy:\n - **Supervised**: uczenie z etykietowanymi danymi\n - **Unsupervised**: znajdowanie wzorców w danych\n - **Reinforcement**: uczenie przez nagrody/kary\n\n🧬 **Deep Learning (DL) ⊂ ML**\n- Podzbiór ML\n- Sieci neuronowe z wieloma warstwami (\"głębokie\")\n- Automatyczne wyodrębnianie cech z danych\n- Przykłady: CNN (obrazy), RNN (sekwencje), Transformers (tekst)\n\n**Praktyczne różnice:**\n\n| Aspekt | AI | ML | Deep Learning |\n|--------|----|----|---------------|\n| **Dane** | Reguły+dane | Duże zbiory | Bardzo duże zbiory |\n| **Wydajność** | Zależna od reguł | Dobra | Najlepsza (z danymi) |\n| **Interpretacja** | Jasna | Średnia | Trudna (black box) |\n| **Moc obliczeniowa** | Niska | Średnia | Wysoka |\n\n**Przykłady:**\n- **AI bez ML**: System ekspercki w medycynie (reguły if-then)\n- **ML bez DL**: Drzewo decyzyjne, SVM, regresja\n- **Deep Learning**: GPT, BERT, rozpoznawanie obrazów"
21
+ },
22
+ {
23
+ "instruction": "Jak debugging w Pythonie?",
24
+ "input": "najlepsze praktyki",
25
+ "output": "**Kompletny guide do debuggingu w Pythonie:**\n\n## 1. **Print debugging (podstawa)**\n```python\n# Klasyczne print\nprint(f\"Zmienna x = {x}, typ: {type(x)}\")\n\n# Lepsze print z kontekstem\nprint(f\"DEBUG [funkcja_name:42]: x={x}\")\n\n# Print z formatowaniem JSON\nimport json\nprint(json.dumps(data, indent=2, ensure_ascii=False))\n```\n\n## 2. **Python Debugger (pdb)**\n```python\n# Wstaw breakpoint\nimport pdb; pdb.set_trace()\n\n# Python 3.7+\nbreakpoint() # Equivalent do pdb.set_trace()\n\n# Komendy w pdb:\n# n (next line)\n# s (step into)\n# c (continue)\n# l (list code)\n# p variable_name (print variable)\n# pp variable_name (pretty print)\n# q (quit)\n```\n\n## 3. **Logging (profesjonalne)**\n```python\nimport logging\n\n# Konfiguracja\nlogging.basicConfig(\n level=logging.DEBUG,\n format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'\n)\n\nlogger = logging.getLogger(__name__)\n\n# Użycie\nlogger.debug(\"Szczegółowe info dla debuggingu\")\nlogger.info(\"Ogólne informacje\")\nlogger.warning(\"Ostrzeżenie\")\nlogger.error(\"Błąd\")\nlogger.critical(\"Krytyczny błąd\")\n```\n\n## 4. **IDE Debuggers**\n- **VS Code**: F5 → Run and Debug\n- **PyCharm**: Klik na marginesie → Add Breakpoint\n- **Jupyter**: `%debug` magic command\n\n## 5. **Specialized Tools**\n```python\n# Memory profiling\nfrom memory_profiler import profile\n\n@profile\ndef my_function():\n # kod do profilowania\n pass\n\n# Performance profiling\nimport cProfile\ncProfile.run('my_function()')\n\n# Exception debugging\nimport traceback\ntry:\n risky_code()\nexcept Exception as e:\n traceback.print_exc()\n # lub logger.exception(\"Error occurred\")\n```\n\n## 6. **Best Practices**\n- **Używaj assertions**: `assert x > 0, \"x must be positive\"`\n- **Type hints**: Pomaga IDE w debuggingu\n- **Unit testy**: Wczesne wykrywanie błędów\n- **Linting**: `flake8`, `pylint`, `black`\n- **Exception handling**: Zawsze obsługuj wyjątki\n\n## 7. **Quick Debug Tricks**\n```python\n# Inspect object\nvars(object) # Wszystkie atrybuty\ndir(object) # Metody i atrybuty\n\n# Check if variable exists\nif 'variable' in locals():\n print(variable)\n\n# Pretty print data structures\nfrom pprint import pprint\npprint(complex_dict)\n```"
26
+ },
27
+ {
28
+ "instruction": "Jak zacząć z RTX 3050 i machine learning?",
29
+ "input": "pierwszy projekt",
30
+ "output": "**Kompletny przewodnik ML na RTX 3050:**\n\n## 🎯 **Setup środowiska**\n\n```bash\n# 1. CUDA i cuDNN\n# Pobierz CUDA 11.8 z nvidia.com\n# Sprawdź: nvidia-smi\n\n# 2. Python environment\nconda create -n ml python=3.10\nconda activate ml\n\n# 3. PyTorch z CUDA\npip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118\n\n# 4. ML libraries\npip install pandas numpy matplotlib scikit-learn jupyter\n\n# Test GPU\npython -c \"import torch; print(f'CUDA: {torch.cuda.is_available()}')\"\n```\n\n## 🚀 **Pierwszy projekt: Klasyfikacja obrazów**\n\n```python\nimport torch\nimport torch.nn as nn\nimport torchvision\nfrom torch.utils.data import DataLoader\nimport matplotlib.pyplot as plt\n\n# 1. Sprawdź GPU\ndevice = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\nprint(f'Using device: {device}')\n\n# 2. Dataset (CIFAR-10)\ntransform = torchvision.transforms.Compose([\n torchvision.transforms.ToTensor(),\n torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n])\n\ntrain_dataset = torchvision.datasets.CIFAR10(\n root='./data', train=True, download=True, transform=transform\n)\ntest_dataset = torchvision.datasets.CIFAR10(\n root='./data', train=False, download=True, transform=transform\n)\n\n# 3. DataLoaders (optymalizowane dla RTX 3050)\ntrain_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)\ntest_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)\n\n# 4. Prosty CNN model\nclass SimpleCNN(nn.Module):\n def __init__(self):\n super(SimpleCNN, self).__init__()\n self.conv1 = nn.Conv2d(3, 32, 3, padding=1)\n self.conv2 = nn.Conv2d(32, 64, 3, padding=1)\n self.pool = nn.MaxPool2d(2, 2)\n self.fc1 = nn.Linear(64 * 8 * 8, 512)\n self.fc2 = nn.Linear(512, 10)\n self.dropout = nn.Dropout(0.5)\n\n def forward(self, x):\n x = self.pool(torch.relu(self.conv1(x)))\n x = self.pool(torch.relu(self.conv2(x)))\n x = x.view(-1, 64 * 8 * 8)\n x = torch.relu(self.fc1(x))\n x = self.dropout(x)\n x = self.fc2(x)\n return x\n\n# 5. Training setup\nmodel = SimpleCNN().to(device)\ncriterion = nn.CrossEntropyLoss()\noptimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n\n# 6. Training loop\nnum_epochs = 5\nfor epoch in range(num_epochs):\n model.train()\n running_loss = 0.0\n \n for i, (images, labels) in enumerate(train_loader):\n images, labels = images.to(device), labels.to(device)\n \n optimizer.zero_grad()\n outputs = model(images)\n loss = criterion(outputs, labels)\n loss.backward()\n optimizer.step()\n \n running_loss += loss.item()\n \n if (i+1) % 100 == 0:\n print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}], Loss: {loss.item():.4f}')\n \n print(f'Epoch [{epoch+1}] Average Loss: {running_loss/len(train_loader):.4f}')\n\n# 7. Evaluation\nmodel.eval()\ncorrect = 0\ntotal = 0\n\nwith torch.no_grad():\n for images, labels in test_loader:\n images, labels = images.to(device), labels.to(device)\n outputs = model(images)\n _, predicted = torch.max(outputs.data, 1)\n total += labels.size(0)\n correct += (predicted == labels).sum().item()\n\nprint(f'Test Accuracy: {100 * correct / total:.2f}%')\n```\n\n## ⚡ **Optymalizacje dla RTX 3050**\n\n```python\n# 1. Mixed Precision Training (oszczędza VRAM)\nfrom torch.cuda.amp import autocast, GradScaler\n\nscaler = GradScaler()\n\n# W training loop:\nwith autocast():\n outputs = model(images)\n loss = criterion(outputs, labels)\n\nscaler.scale(loss).backward()\nscaler.step(optimizer)\nscaler.update()\n\n# 2. Gradient Checkpointing\nmodel.gradient_checkpointing_enable()\n\n# 3. DataLoader optymalizacje\ntrain_loader = DataLoader(\n dataset, \n batch_size=32, # Zmniejsz jeśli brakuje VRAM\n num_workers=2, # 2-4 dla RTX 3050\n pin_memory=True, # Szybszy transfer CPU->GPU\n persistent_workers=True # PyTorch 1.7+\n)\n```\n\n## 📊 **Monitoring GPU**\n\n```python\n# Memory monitoring\ndef print_gpu_memory():\n if torch.cuda.is_available():\n print(f'GPU Memory: {torch.cuda.memory_allocated()/1024**3:.1f}GB / {torch.cuda.max_memory_allocated()/1024**3:.1f}GB')\n torch.cuda.reset_peak_memory_stats()\n\n# Użyj po każdej epoce\nprint_gpu_memory()\n```\n\n## 🎯 **Następne projekty:**\n\n1. **NLP**: Fine-tuning BERT na polskich tekstach\n2. **Computer Vision**: Object detection z YOLO\n3. **Time Series**: Predykcja cen akcji z LSTM\n4. **Generative**: Treninig mini-GAN\n5. **Reinforcement Learning**: Gra w Atari\n\n## 💡 **Tips dla RTX 3050:**\n\n- **Batch size**: Zacznij od 32-64, dostosuj do VRAM\n- **Model size**: Używaj mniejszych modeli (ResNet18 zamiast ResNet152)\n- **Mixed precision**: Zawsze włączaj dla większych modeli\n- **Transfer learning**: Używaj pre-trained modeli zamiast trenować od zera\n- **Gradient accumulation**: Jeśli musisz mały batch size\n\n**RTX 3050 = świetna karta do nauki ML! 8GB VRAM wystarczy na większość projektów edukacyjnych.**"
31
+ }
32
+ ]
model_requirements.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Fine-tuning requirements
2
+ torch>=2.2.2
3
+ transformers>=4.39.0
4
+ datasets>=2.18.0
5
+ peft>=0.10.0
6
+ accelerate>=0.29.0
7
+ bitsandbytes>=0.43.0
8
+ huggingface_hub>=0.22.0
9
+
10
+ # For data processing
11
+ pandas>=2.2.0
12
+ numpy>=1.26.0
13
+
14
+ # For model publishing
15
+ huggingface_hub[cli]>=0.22.0
16
+
17
+ # Optional: for advanced training
18
+ wandb>=0.17.0
19
+ tensorboard>=2.16.0
quick-start.sh ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # 🚀 Minimal LLM Setup - Everything in one script!
4
+
5
+ set -e
6
+
7
+ # Colors
8
+ RED='\033[0;31m'
9
+ GREEN='\033[0;32m'
10
+ YELLOW='\033[1;33m'
11
+ BLUE='\033[0;34m'
12
+ NC='\033[0m' # No Color
13
+
14
+ echo -e "${BLUE}🚀 Minimal LLM Setup${NC}"
15
+ echo "===================="
16
+
17
+ # Check Docker
18
+ if ! command -v docker &> /dev/null; then
19
+ echo -e "${RED}❌ Docker not found. Please install Docker first.${NC}"
20
+ exit 1
21
+ fi
22
+
23
+ # Check Docker Compose
24
+ if ! docker compose version &> /dev/null; then
25
+ echo -e "${RED}❌ Docker Compose not found. Please install Docker Compose.${NC}"
26
+ exit 1
27
+ fi
28
+
29
+ # Check NVIDIA Docker (optional)
30
+ if docker run --rm --gpus all nvidia/cuda:11.8-base-ubuntu22.04 nvidia-smi &>/dev/null; then
31
+ echo -e "${GREEN}✅ NVIDIA Docker detected${NC}"
32
+ GPU_AVAILABLE=true
33
+ else
34
+ echo -e "${YELLOW}⚠️ No GPU detected, running on CPU${NC}"
35
+ GPU_AVAILABLE=false
36
+ fi
37
+
38
+ # Create project structure
39
+ echo -e "${BLUE}📁 Creating project structure...${NC}"
40
+ mkdir -p app
41
+
42
+ # Create minimal Streamlit app if it doesn't exist
43
+ if [ ! -f "app/main.py" ]; then
44
+ echo -e "${BLUE}📝 Creating Streamlit app...${NC}"
45
+ # The file content would be copied here in real scenario
46
+ echo "# Streamlit app created. Copy the main.py content here."
47
+ fi
48
+
49
+ # Modify docker-compose for CPU if no GPU
50
+ if [ "$GPU_AVAILABLE" = false ]; then
51
+ echo -e "${YELLOW}🔧 Configuring for CPU mode...${NC}"
52
+ sed -i 's/deploy:/# deploy:/g' docker-compose.yml || true
53
+ sed -i 's/resources:/# resources:/g' docker-compose.yml || true
54
+ sed -i 's/reservations:/# reservations:/g' docker-compose.yml || true
55
+ sed -i 's/devices:/# devices:/g' docker-compose.yml || true
56
+ sed -i 's/- driver: nvidia/# - driver: nvidia/g' docker-compose.yml || true
57
+ sed -i 's/count: 1/# count: 1/g' docker-compose.yml || true
58
+ sed -i 's/capabilities: \[gpu\]/# capabilities: [gpu]/g' docker-compose.yml || true
59
+ fi
60
+
61
+ # Build and start services
62
+ echo -e "${BLUE}🔨 Building and starting services...${NC}"
63
+ docker compose up --build -d
64
+
65
+ # Wait for services
66
+ echo -e "${BLUE}⏳ Waiting for services to start...${NC}"
67
+
68
+ # Wait for Ollama
69
+ echo -n "Waiting for Ollama"
70
+ for i in {1..30}; do
71
+ if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
72
+ echo -e "${GREEN} ✅${NC}"
73
+ break
74
+ fi
75
+ echo -n "."
76
+ sleep 2
77
+ done
78
+
79
+ # Wait for Streamlit
80
+ echo -n "Waiting for Streamlit"
81
+ for i in {1..30}; do
82
+ if curl -s http://localhost:8501/_stcore/health > /dev/null 2>&1; then
83
+ echo -e "${GREEN} ✅${NC}"
84
+ break
85
+ fi
86
+ echo -n "."
87
+ sleep 2
88
+ done
89
+
90
+ # Check if model download completed
91
+ echo -e "${BLUE}📥 Checking model download...${NC}"
92
+ docker logs model-setup | tail -5
93
+
94
+ echo
95
+ echo -e "${GREEN}🎉 Setup completed!${NC}"
96
+ echo "==================="
97
+ echo
98
+ echo -e "${BLUE}📍 Access points:${NC}"
99
+ echo " • Streamlit UI: http://localhost:8501"
100
+ echo " • Ollama API: http://localhost:11434"
101
+ echo
102
+ echo -e "${BLUE}🔍 Useful commands:${NC}"
103
+ echo " • Check logs: docker compose logs -f"
104
+ echo " • Stop services: docker compose down"
105
+ echo " • Restart: docker compose restart"
106
+ echo " • Shell access: docker exec -it ollama-engine bash"
107
+ echo
108
+ echo -e "${BLUE}🧪 Test API:${NC}"
109
+ echo ' curl -X POST http://localhost:11434/api/generate \'
110
+ echo ' -H "Content-Type: application/json" \'
111
+ echo ' -d '"'"'{"model": "mistral:7b-instruct", "prompt": "Hello!"}'\'
112
+
113
+ # Auto-open browser (optional)
114
+ if command -v xdg-open &> /dev/null; then
115
+ echo
116
+ read -p "Open browser automatically? (y/N): " -n 1 -r
117
+ echo
118
+ if [[ $REPLY =~ ^[Yy]$ ]]; then
119
+ xdg-open http://localhost:8501
120
+ fi
121
+ elif command -v open &> /dev/null; then
122
+ echo
123
+ read -p "Open browser automatically? (y/N): " -n 1 -r
124
+ echo
125
+ if [[ $REPLY =~ ^[Yy]$ ]]; then
126
+ open http://localhost:8501
127
+ fi
128
+ fi
129
+
130
+ echo
131
+ echo -e "${GREEN}Happy chatting! 🤖${NC}"
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ streamlit==1.29.0
2
+ requests==2.31.0
3
+ ollama==0.1.7
test_converted_model.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ 🧪 Test script for converted GGUF model
4
+ Tests both llama.cpp and Ollama integration
5
+ """
6
+
7
+ import os
8
+ import subprocess
9
+ import time
10
+ import requests
11
+ import json
12
+ from pathlib import Path
13
+
14
+
15
+ def test_llamacpp_direct():
16
+ """Test model directly with llama.cpp"""
17
+ print("🧪 Testing with llama.cpp directly...")
18
+
19
+ model_file = "my_custom_model.gguf"
20
+ if not os.path.exists(model_file):
21
+ print(f"❌ Model file not found: {model_file}")
22
+ return False
23
+
24
+ llamacpp_main = "./llama.cpp/main"
25
+ if not os.path.exists(llamacpp_main):
26
+ print(f"❌ llama.cpp main not found: {llamacpp_main}")
27
+ print("Run: ./convert_to_gguf.sh first")
28
+ return False
29
+
30
+ test_prompts = [
31
+ "Hello, how are you?",
32
+ "Wyjaśnij co to jest Docker",
33
+ "Napisz prostą funkcję w Pythonie"
34
+ ]
35
+
36
+ for i, prompt in enumerate(test_prompts, 1):
37
+ print(f"\n--- Test {i}/3: {prompt[:30]}... ---")
38
+
39
+ cmd = [
40
+ llamacpp_main,
41
+ "-m", model_file,
42
+ "-p", prompt,
43
+ "-n", "100",
44
+ "--temp", "0.7",
45
+ "--top-p", "0.9"
46
+ ]
47
+
48
+ try:
49
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
50
+ if result.returncode == 0:
51
+ print("✅ Response generated successfully")
52
+ print("Response preview:", result.stdout[:200] + "..." if len(result.stdout) > 200 else result.stdout)
53
+ else:
54
+ print(f"❌ Error: {result.stderr}")
55
+ return False
56
+ except subprocess.TimeoutExpired:
57
+ print("⏰ Timeout - model may be too slow")
58
+ return False
59
+ except Exception as e:
60
+ print(f"❌ Exception: {e}")
61
+ return False
62
+
63
+ return True
64
+
65
+
66
+ def test_ollama_integration():
67
+ """Test model through Ollama"""
68
+ print("\n🤖 Testing Ollama integration...")
69
+
70
+ # Check if Ollama is installed
71
+ try:
72
+ result = subprocess.run(["ollama", "list"], capture_output=True, text=True)
73
+ if result.returncode != 0:
74
+ print("❌ Ollama not installed or not running")
75
+ return False
76
+ except FileNotFoundError:
77
+ print("❌ Ollama command not found")
78
+ return False
79
+
80
+ model_name = "my-custom-model"
81
+
82
+ # Check if our custom model exists in Ollama
83
+ if model_name not in result.stdout:
84
+ print(f"⚠️ Model '{model_name}' not found in Ollama")
85
+ print("Create it first:")
86
+ print("1. ollama create my-custom-model -f Modelfile")
87
+ return False
88
+
89
+ print(f"✅ Found model: {model_name}")
90
+
91
+ # Test through Ollama API
92
+ test_prompts = [
93
+ "Cześć! Kim jesteś?",
94
+ "Jak zoptymalizować kod Python?",
95
+ "Co to jest machine learning?"
96
+ ]
97
+
98
+ for i, prompt in enumerate(test_prompts, 1):
99
+ print(f"\n--- Ollama Test {i}/3: {prompt[:30]}... ---")
100
+
101
+ try:
102
+ # Test via CLI
103
+ cmd = ["ollama", "run", model_name, prompt]
104
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
105
+
106
+ if result.returncode == 0:
107
+ print("✅ Ollama CLI response successful")
108
+ print("Response preview:", result.stdout[:200] + "..." if len(result.stdout) > 200 else result.stdout)
109
+ else:
110
+ print(f"❌ Ollama CLI error: {result.stderr}")
111
+ continue
112
+
113
+ except subprocess.TimeoutExpired:
114
+ print("⏰ Ollama timeout")
115
+ continue
116
+ except Exception as e:
117
+ print(f"❌ Ollama exception: {e}")
118
+ continue
119
+
120
+ # Test via API
121
+ print("\n🌐 Testing Ollama API...")
122
+ try:
123
+ api_url = "http://localhost:11434/api/generate"
124
+ test_data = {
125
+ "model": model_name,
126
+ "prompt": "Hello! Test API call.",
127
+ "stream": False
128
+ }
129
+
130
+ response = requests.post(api_url, json=test_data, timeout=60)
131
+ if response.status_code == 200:
132
+ data = response.json()
133
+ print("✅ Ollama API response successful")
134
+ print("API Response:", data.get('response', 'No response field')[:100])
135
+ else:
136
+ print(f"❌ API Error: {response.status_code}")
137
+ return False
138
+
139
+ except requests.exceptions.RequestException as e:
140
+ print(f"❌ API Request failed: {e}")
141
+ return False
142
+
143
+ return True
144
+
145
+
146
+ def benchmark_model():
147
+ """Simple benchmark of the model"""
148
+ print("\n📊 Running simple benchmark...")
149
+
150
+ model_file = "my_custom_model.gguf"
151
+ if not os.path.exists(model_file):
152
+ print("❌ Model file not found for benchmark")
153
+ return
154
+
155
+ # Get file size
156
+ file_size = os.path.getsize(model_file) / (1024 ** 3) # GB
157
+ print(f"📁 Model size: {file_size:.2f} GB")
158
+
159
+ # Benchmark prompt
160
+ benchmark_prompt = "Explain artificial intelligence in simple terms."
161
+
162
+ llamacpp_main = "./llama.cpp/main"
163
+ if os.path.exists(llamacpp_main):
164
+ print("⏱️ Timing generation speed...")
165
+
166
+ cmd = [
167
+ llamacpp_main,
168
+ "-m", model_file,
169
+ "-p", benchmark_prompt,
170
+ "-n", "100",
171
+ "--temp", "0.7"
172
+ ]
173
+
174
+ start_time = time.time()
175
+ try:
176
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
177
+ end_time = time.time()
178
+
179
+ if result.returncode == 0:
180
+ duration = end_time - start_time
181
+ # Rough tokens estimation
182
+ tokens = len(result.stdout.split())
183
+ tokens_per_second = tokens / duration if duration > 0 else 0
184
+
185
+ print(f"⚡ Generation time: {duration:.2f} seconds")
186
+ print(f"🚀 Speed: ~{tokens_per_second:.1f} tokens/second")
187
+ print(f"📝 Generated tokens: ~{tokens}")
188
+ else:
189
+ print("❌ Benchmark failed")
190
+ except subprocess.TimeoutExpired:
191
+ print("⏰ Benchmark timeout")
192
+
193
+
194
+ def main():
195
+ """Main test runner"""
196
+ print("🧪 Custom Model Test Suite")
197
+ print("=" * 40)
198
+
199
+ # Check prerequisites
200
+ print("🔍 Checking prerequisites...")
201
+
202
+ required_files = [
203
+ "my_custom_model.gguf",
204
+ "./llama.cpp/main",
205
+ "Modelfile"
206
+ ]
207
+
208
+ missing_files = [f for f in required_files if not os.path.exists(f)]
209
+
210
+ if missing_files:
211
+ print("❌ Missing required files:")
212
+ for f in missing_files:
213
+ print(f" • {f}")
214
+ print("\nRun these commands first:")
215
+ print("1. python create_custom_model.py # fine-tune model")
216
+ print("2. ./convert_to_gguf.sh # convert to GGUF")
217
+ print("3. ollama create my-custom-model -f Modelfile # import to Ollama")
218
+ return
219
+
220
+ print("✅ All required files found")
221
+
222
+ # Run tests
223
+ tests_passed = 0
224
+ total_tests = 3
225
+
226
+ # Test 1: Direct llama.cpp
227
+ if test_llamacpp_direct():
228
+ tests_passed += 1
229
+ print("✅ llama.cpp test PASSED")
230
+ else:
231
+ print("❌ llama.cpp test FAILED")
232
+
233
+ # Test 2: Ollama integration
234
+ if test_ollama_integration():
235
+ tests_passed += 1
236
+ print("✅ Ollama test PASSED")
237
+ else:
238
+ print("❌ Ollama test FAILED")
239
+
240
+ # Test 3: Benchmark
241
+ benchmark_model()
242
+ tests_passed += 1 # Benchmark always "passes"
243
+
244
+ # Results
245
+ print("\n" + "=" * 40)
246
+ print(f"🎯 Test Results: {tests_passed}/{total_tests} passed")
247
+
248
+ if tests_passed == total_tests:
249
+ print("🎉 All tests passed! Your custom model is ready!")
250
+ print("\n🚀 Next steps:")
251
+ print("• ollama push my-custom-model # Share with the world")
252
+ print("• Integrate into your applications")
253
+ print("• Fine-tune further with more data")
254
+ else:
255
+ print("⚠️ Some tests failed. Check the output above.")
256
+
257
+ # Usage examples
258
+ print("\n📚 Usage Examples:")
259
+ print("# Ollama CLI:")
260
+ print("ollama run my-custom-model 'Your question here'")
261
+ print("\n# Ollama API:")
262
+ print("curl -X POST http://localhost:11434/api/generate \\")
263
+ print(" -H 'Content-Type: application/json' \\")
264
+ print(" -d '{\"model\": \"my-custom-model\", \"prompt\": \"Hello!\"}'")
265
+
266
+ print("\n# Python integration:")
267
+ print("import ollama")
268
+ print("response = ollama.chat(model='my-custom-model', messages=[")
269
+ print(" {'role': 'user', 'content': 'Hello!'}])")
270
+
271
+
272
+ if __name__ == "__main__":
273
+ main()