Spaces:
Sleeping
Sleeping
Nikolay Ponomarev
commited on
Commit
·
12101fc
1
Parent(s):
d289510
inance help
Browse files- Dockerfile +14 -12
- app.py +123 -60
- start.sh +51 -30
Dockerfile
CHANGED
|
@@ -5,7 +5,6 @@ RUN apt-get update && apt-get install -y \
|
|
| 5 |
curl ca-certificates python3 python3-pip bash && \
|
| 6 |
rm -rf /var/lib/apt/lists/*
|
| 7 |
|
| 8 |
-
# Install Ollama
|
| 9 |
RUN curl -fsSL https://ollama.com/install.sh | sh
|
| 10 |
|
| 11 |
WORKDIR /app
|
|
@@ -16,24 +15,27 @@ COPY app.py /app/app.py
|
|
| 16 |
COPY start.sh /app/start.sh
|
| 17 |
RUN chmod +x /app/start.sh
|
| 18 |
|
| 19 |
-
# Ollama server
|
| 20 |
ENV OLLAMA_HOST=0.0.0.0:11434
|
| 21 |
-
|
| 22 |
-
# CPU-friendly
|
| 23 |
-
ENV OLLAMA_CONTEXT_LENGTH=4096
|
| 24 |
ENV OLLAMA_NUM_PARALLEL=1
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
| 26 |
ENV OLLAMA_KEEP_ALIVE=10m
|
| 27 |
|
| 28 |
-
# Gradio on Spaces
|
| 29 |
ENV GRADIO_SERVER_NAME=0.0.0.0
|
| 30 |
ENV GRADIO_SERVER_PORT=7860
|
| 31 |
|
| 32 |
-
#
|
| 33 |
-
ENV
|
| 34 |
-
ENV
|
| 35 |
-
ENV
|
| 36 |
-
ENV
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
ENV LITELLM_TIMEOUT=3600
|
| 38 |
|
| 39 |
CMD ["/app/start.sh"]
|
|
|
|
| 5 |
curl ca-certificates python3 python3-pip bash && \
|
| 6 |
rm -rf /var/lib/apt/lists/*
|
| 7 |
|
|
|
|
| 8 |
RUN curl -fsSL https://ollama.com/install.sh | sh
|
| 9 |
|
| 10 |
WORKDIR /app
|
|
|
|
| 15 |
COPY start.sh /app/start.sh
|
| 16 |
RUN chmod +x /app/start.sh
|
| 17 |
|
|
|
|
| 18 |
ENV OLLAMA_HOST=0.0.0.0:11434
|
| 19 |
+
ENV OLLAMA_CONTEXT_LENGTH=2048
|
|
|
|
|
|
|
| 20 |
ENV OLLAMA_NUM_PARALLEL=1
|
| 21 |
+
|
| 22 |
+
# Сколько моделей держать загруженными.
|
| 23 |
+
# На CPU/16GB лучше 1 (иначе может съесть память).
|
| 24 |
+
ENV OLLAMA_MAX_LOADED_MODELS=3
|
| 25 |
ENV OLLAMA_KEEP_ALIVE=10m
|
| 26 |
|
|
|
|
| 27 |
ENV GRADIO_SERVER_NAME=0.0.0.0
|
| 28 |
ENV GRADIO_SERVER_PORT=7860
|
| 29 |
|
| 30 |
+
# 3 модели (можно переопределить в Space Variables)
|
| 31 |
+
ENV PIPELINE=multi
|
| 32 |
+
ENV MODEL_NAME=qwen2.5-coder:1.5b
|
| 33 |
+
ENV TRIAGE_MODEL=qwen2.5-coder:1.5b
|
| 34 |
+
ENV ACTIONS_MODEL=qwen2.5-coder:3b
|
| 35 |
+
ENV WRITER_MODEL=qwen2.5-coder:3b
|
| 36 |
+
|
| 37 |
+
ENV NUM_CTX=2048
|
| 38 |
+
ENV MAX_TOKENS=512
|
| 39 |
ENV LITELLM_TIMEOUT=3600
|
| 40 |
|
| 41 |
CMD ["/app/start.sh"]
|
app.py
CHANGED
|
@@ -10,26 +10,34 @@ from smolagents import LiteLLMModel
|
|
| 10 |
|
| 11 |
warnings.filterwarnings("ignore", category=UserWarning, module="pydantic")
|
| 12 |
|
| 13 |
-
MODEL_NAME = os.getenv("MODEL_NAME", "qwen2.5-coder:1.5b")
|
| 14 |
OLLAMA_BASE = os.getenv("OLLAMA_URL", "http://127.0.0.1:11434").rstrip("/")
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
NUM_CTX = int(os.getenv("NUM_CTX", "2048"))
|
| 17 |
MAX_TOKENS = int(os.getenv("MAX_TOKENS", "512"))
|
| 18 |
LITELLM_TIMEOUT = int(os.getenv("LITELLM_TIMEOUT", "3600"))
|
| 19 |
|
| 20 |
-
|
|
|
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
-
def make_model():
|
| 24 |
-
# ВАЖНО: фикс “string indices must be integers” на некоторых связках smolagents/litellm
|
| 25 |
return LiteLLMModel(
|
| 26 |
-
model_id=f"ollama_chat/{
|
| 27 |
api_base=OLLAMA_BASE,
|
| 28 |
num_ctx=NUM_CTX,
|
| 29 |
temperature=0.2,
|
| 30 |
max_tokens=MAX_TOKENS,
|
| 31 |
timeout=LITELLM_TIMEOUT,
|
| 32 |
-
flatten_messages_as_text=False,
|
| 33 |
)
|
| 34 |
|
| 35 |
|
|
@@ -76,33 +84,13 @@ def web_search(query: str) -> str:
|
|
| 76 |
return _strip_html(resp.text)[:4500]
|
| 77 |
|
| 78 |
|
| 79 |
-
def _friendly_error(e: Exception) -> str:
|
| 80 |
-
return (
|
| 81 |
-
"### Ошибка\n\n"
|
| 82 |
-
"Бэкенд получил запрос, но он завершился ошибкой.\n\n"
|
| 83 |
-
"Текст ошибки:\n"
|
| 84 |
-
f"```text\n{repr(e)}\n```"
|
| 85 |
-
)
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
def ping():
|
| 89 |
-
print("[ping] clicked", flush=True)
|
| 90 |
-
return "pong ✅ (кнопка работает, бэкенд отвечает)"
|
| 91 |
-
|
| 92 |
-
|
| 93 |
def build_web_context(region: str, allow_internet: bool) -> str:
|
| 94 |
if not allow_internet:
|
| 95 |
return ""
|
| 96 |
-
|
| 97 |
q1 = f"emergency financial assistance {region}"
|
| 98 |
q2 = "how to avoid financial aid scams"
|
| 99 |
-
|
| 100 |
-
print(f"[web] search 1: {q1}", flush=True)
|
| 101 |
r1 = web_search(q1)
|
| 102 |
-
|
| 103 |
-
print(f"[web] search 2: {q2}", flush=True)
|
| 104 |
r2 = web_search(q2)
|
| 105 |
-
|
| 106 |
return (
|
| 107 |
"## Результаты web_search (внутренний контекст)\n"
|
| 108 |
f"### Поиск 1: {q1}\n{r1}\n\n"
|
|
@@ -110,25 +98,35 @@ def build_web_context(region: str, allow_internet: bool) -> str:
|
|
| 110 |
)
|
| 111 |
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
def run_fin_aid(case_description: str, region: str, urgency: str, allow_internet: bool):
|
| 114 |
-
|
| 115 |
-
print("[run] button clicked", flush=True)
|
| 116 |
print(f"[run] region={region!r} urgency={urgency!r} internet={allow_internet}", flush=True)
|
| 117 |
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
"- Никаких инвестсоветов. Фокус: кризисная поддержка, бюджет, варианты помощи.\n"
|
| 126 |
-
)
|
| 127 |
|
|
|
|
| 128 |
web_ctx = build_web_context(region, allow_internet)
|
| 129 |
print("[run] web_ctx ready", flush=True)
|
| 130 |
|
| 131 |
-
|
|
|
|
|
|
|
| 132 |
Сформируй ГОТОВЫЙ отчёт в Markdown.
|
| 133 |
|
| 134 |
Структура:
|
|
@@ -153,36 +151,104 @@ def run_fin_aid(case_description: str, region: str, urgency: str, allow_internet
|
|
| 153 |
|
| 154 |
{web_ctx}
|
| 155 |
"""
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
except Exception as e:
|
| 165 |
print("[run] ERROR:", repr(e), flush=True)
|
| 166 |
-
return _friendly_error(e)
|
| 167 |
|
| 168 |
|
| 169 |
print("[boot] app.py loaded", flush=True)
|
| 170 |
|
| 171 |
-
with gr.Blocks(title="Financial Aid Navigator (
|
| 172 |
-
gr.Markdown("# Financial Aid Navigator (
|
| 173 |
gr.Markdown(
|
| 174 |
-
f"- Model: `{MODEL_NAME}`\n"
|
| 175 |
f"- Ollama: `{OLLAMA_BASE}`\n"
|
| 176 |
-
f"-
|
| 177 |
-
f"-
|
|
|
|
|
|
|
|
|
|
| 178 |
)
|
| 179 |
|
| 180 |
-
with gr.Row():
|
| 181 |
-
ping_btn = gr.Button("Ping (проверка клика)")
|
| 182 |
-
ping_out = gr.Markdown()
|
| 183 |
-
|
| 184 |
-
ping_btn.click(fn=ping, inputs=None, outputs=ping_out, queue=False)
|
| 185 |
-
|
| 186 |
region = gr.Textbox(label="Регион", lines=1)
|
| 187 |
urgency = gr.Dropdown(
|
| 188 |
["срочно (24–72 часа)", "в течение недели", "не срочно (в течение месяца)"],
|
|
@@ -193,7 +259,7 @@ with gr.Blocks(title="Financial Aid Navigator (CPU debug)") as demo:
|
|
| 193 |
allow_internet = gr.Checkbox(label="Разрешить поиск в интернете", value=False)
|
| 194 |
|
| 195 |
run_btn = gr.Button("Сформировать план помощи")
|
| 196 |
-
output = gr.Textbox(label="План", lines=
|
| 197 |
|
| 198 |
run_btn.click(
|
| 199 |
fn=run_fin_aid,
|
|
@@ -202,9 +268,6 @@ with gr.Blocks(title="Financial Aid Navigator (CPU debug)") as demo:
|
|
| 202 |
queue=False,
|
| 203 |
)
|
| 204 |
|
| 205 |
-
# очередь можно включить позже, когда всё заработает
|
| 206 |
-
# demo.queue(max_size=20)
|
| 207 |
-
|
| 208 |
def main():
|
| 209 |
server_name = os.getenv("GRADIO_SERVER_NAME", "0.0.0.0")
|
| 210 |
server_port = int(os.getenv("GRADIO_SERVER_PORT", "7860"))
|
|
|
|
| 10 |
|
| 11 |
warnings.filterwarnings("ignore", category=UserWarning, module="pydantic")
|
| 12 |
|
|
|
|
| 13 |
OLLAMA_BASE = os.getenv("OLLAMA_URL", "http://127.0.0.1:11434").rstrip("/")
|
| 14 |
|
| 15 |
+
# Можно задать разные модели на разные этапы:
|
| 16 |
+
# TRIAGE_MODEL, ACTIONS_MODEL, WRITER_MODEL
|
| 17 |
+
DEFAULT_MODEL = os.getenv("MODEL_NAME", "qwen2.5-coder:1.5b")
|
| 18 |
+
TRIAGE_MODEL = os.getenv("TRIAGE_MODEL", DEFAULT_MODEL)
|
| 19 |
+
ACTIONS_MODEL = os.getenv("ACTIONS_MODEL", DEFAULT_MODEL)
|
| 20 |
+
WRITER_MODEL = os.getenv("WRITER_MODEL", DEFAULT_MODEL)
|
| 21 |
+
|
| 22 |
+
# CPU-friendly дефолты
|
| 23 |
NUM_CTX = int(os.getenv("NUM_CTX", "2048"))
|
| 24 |
MAX_TOKENS = int(os.getenv("MAX_TOKENS", "512"))
|
| 25 |
LITELLM_TIMEOUT = int(os.getenv("LITELLM_TIMEOUT", "3600"))
|
| 26 |
|
| 27 |
+
# single = 1 модель/1 запрос (быстрее на CPU)
|
| 28 |
+
# multi = 3 этапа: triage -> actions -> writer (и могут быть 3 разные модели)
|
| 29 |
+
PIPELINE = os.getenv("PIPELINE", "multi").strip().lower()
|
| 30 |
|
| 31 |
|
| 32 |
+
def make_model(model_name: str) -> LiteLLMModel:
|
|
|
|
| 33 |
return LiteLLMModel(
|
| 34 |
+
model_id=f"ollama_chat/{model_name}",
|
| 35 |
api_base=OLLAMA_BASE,
|
| 36 |
num_ctx=NUM_CTX,
|
| 37 |
temperature=0.2,
|
| 38 |
max_tokens=MAX_TOKENS,
|
| 39 |
timeout=LITELLM_TIMEOUT,
|
| 40 |
+
flatten_messages_as_text=False, # важно для совместимости
|
| 41 |
)
|
| 42 |
|
| 43 |
|
|
|
|
| 84 |
return _strip_html(resp.text)[:4500]
|
| 85 |
|
| 86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
def build_web_context(region: str, allow_internet: bool) -> str:
|
| 88 |
if not allow_internet:
|
| 89 |
return ""
|
|
|
|
| 90 |
q1 = f"emergency financial assistance {region}"
|
| 91 |
q2 = "how to avoid financial aid scams"
|
|
|
|
|
|
|
| 92 |
r1 = web_search(q1)
|
|
|
|
|
|
|
| 93 |
r2 = web_search(q2)
|
|
|
|
| 94 |
return (
|
| 95 |
"## Результаты web_search (внутренний контекст)\n"
|
| 96 |
f"### Поиск 1: {q1}\n{r1}\n\n"
|
|
|
|
| 98 |
)
|
| 99 |
|
| 100 |
|
| 101 |
+
def _friendly_error(e: Exception) -> str:
|
| 102 |
+
return (
|
| 103 |
+
"### Ошибка\n\n"
|
| 104 |
+
"Запрос завершился ошибкой.\n\n"
|
| 105 |
+
"```text\n"
|
| 106 |
+
f"{repr(e)}\n"
|
| 107 |
+
"```"
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
|
| 111 |
def run_fin_aid(case_description: str, region: str, urgency: str, allow_internet: bool):
|
| 112 |
+
print("[run] clicked", flush=True)
|
|
|
|
| 113 |
print(f"[run] region={region!r} urgency={urgency!r} internet={allow_internet}", flush=True)
|
| 114 |
|
| 115 |
+
system = (
|
| 116 |
+
"Ты помощник по финансовой навигации. Всегда отвечай на РУССКОМ.\n"
|
| 117 |
+
"Правила безопасности:\n"
|
| 118 |
+
"- НЕ проси и НЕ предлагай вводить чувствительные данные: номера карт, CVV, пароли, SMS-коды, номера документов.\n"
|
| 119 |
+
"- НЕ проси полный паспорт/ID. Разрешены только общие категории: 'удостоверение личности' без номеров/серий.\n"
|
| 120 |
+
"- Никаких инвестсоветов. Фокус: кризисная поддержка, бюджет, варианты помощи.\n"
|
| 121 |
+
)
|
|
|
|
|
|
|
| 122 |
|
| 123 |
+
try:
|
| 124 |
web_ctx = build_web_context(region, allow_internet)
|
| 125 |
print("[run] web_ctx ready", flush=True)
|
| 126 |
|
| 127 |
+
if PIPELINE != "multi":
|
| 128 |
+
model = make_model(WRITER_MODEL)
|
| 129 |
+
prompt = f"""
|
| 130 |
Сформируй ГОТОВЫЙ отчёт в Markdown.
|
| 131 |
|
| 132 |
Структура:
|
|
|
|
| 151 |
|
| 152 |
{web_ctx}
|
| 153 |
"""
|
| 154 |
+
print(f"[run] single -> model={WRITER_MODEL}", flush=True)
|
| 155 |
+
out = llm_text(model, textwrap.dedent(prompt), system_prompt=system)
|
| 156 |
+
print("[run] done (single), len=", len(out), flush=True)
|
| 157 |
+
return gr.update(value=out)
|
| 158 |
+
|
| 159 |
+
# --- MULTI: 3 этапа, возможно 3 разные модели ---
|
| 160 |
+
triage_model = make_model(TRIAGE_MODEL)
|
| 161 |
+
actions_model = make_model(ACTIONS_MODEL)
|
| 162 |
+
writer_model = make_model(WRITER_MODEL)
|
| 163 |
+
|
| 164 |
+
triage_prompt = f"""
|
| 165 |
+
Ты агент Triage.
|
| 166 |
+
Сформируй структуру:
|
| 167 |
+
- Краткое резюме (2–4 предложения)
|
| 168 |
+
- Допущения (список)
|
| 169 |
+
- Приоритеты: Сегодня / На неделе / В течение месяца
|
| 170 |
+
- Риски (выселение/отключения/штрафы/коллекторы/мошенники/перегрузка)
|
| 171 |
+
- Какие данные подготовить (безопасный список, без номеров документов)
|
| 172 |
+
- Вопросы для уточнения (до 8)
|
| 173 |
+
|
| 174 |
+
Срочность: {urgency}
|
| 175 |
+
Регион: {region}
|
| 176 |
+
Описание:
|
| 177 |
+
{case_description}
|
| 178 |
|
| 179 |
+
{web_ctx}
|
| 180 |
+
"""
|
| 181 |
+
print(f"[run] triage -> model={TRIAGE_MODEL}", flush=True)
|
| 182 |
+
triage = llm_text(triage_model, textwrap.dedent(triage_prompt), system_prompt=system)
|
| 183 |
+
print("[run] triage done, len=", len(triage), flush=True)
|
| 184 |
+
|
| 185 |
+
actions_prompt = f"""
|
| 186 |
+
Ты агент Actions.
|
| 187 |
+
На основе Triage составь:
|
| 188 |
+
1) Действия в 3 корзинах: urgent / short_term / mid_term (каждая 4–8 пунктов)
|
| 189 |
+
2) Варианты помощи в регионе (resources): government/ngo/debt_counseling/housing/utilities/other
|
| 190 |
+
Формат: Markdown (НЕ Python-структуры).
|
| 191 |
+
|
| 192 |
+
Triage:
|
| 193 |
+
{triage}
|
| 194 |
+
"""
|
| 195 |
+
print(f"[run] actions -> model={ACTIONS_MODEL}", flush=True)
|
| 196 |
+
actions = llm_text(actions_model, textwrap.dedent(actions_prompt), system_prompt=system)
|
| 197 |
+
print("[run] actions done, len=", len(actions), flush=True)
|
| 198 |
+
|
| 199 |
+
writer_prompt = f"""
|
| 200 |
+
Ты агент Writer.
|
| 201 |
+
Собери финальный отчёт в Markdown строго по структуре:
|
| 202 |
+
|
| 203 |
+
# План финансовой помощи
|
| 204 |
+
## Важно
|
| 205 |
+
## Сводка ситуации
|
| 206 |
+
## Приоритеты
|
| 207 |
+
### Сегодня (24–72 часа)
|
| 208 |
+
### На неделе
|
| 209 |
+
### В течение месяца
|
| 210 |
+
## Варианты помощи в регионе
|
| 211 |
+
## Пошаговый план
|
| 212 |
+
## Мини-бюджет на 30 дней
|
| 213 |
+
## Анти-мошенничество
|
| 214 |
+
## Что подготовить
|
| 215 |
+
## Вопросы для уточнения (до 8)
|
| 216 |
+
|
| 217 |
+
Требования:
|
| 218 |
+
- НЕ проси чувствительные данные, НЕ проси номера документов/карт, НЕ проси полный паспорт/ID.
|
| 219 |
+
- Не вставляй исходники/логи/HTML.
|
| 220 |
+
- Используй содержимое Triage и Actions.
|
| 221 |
+
|
| 222 |
+
Triage:
|
| 223 |
+
{triage}
|
| 224 |
+
|
| 225 |
+
Actions:
|
| 226 |
+
{actions}
|
| 227 |
+
"""
|
| 228 |
+
print(f"[run] writer -> model={WRITER_MODEL}", flush=True)
|
| 229 |
+
out = llm_text(writer_model, textwrap.dedent(writer_prompt), system_prompt=system)
|
| 230 |
+
print("[run] done (multi), len=", len(out), flush=True)
|
| 231 |
+
|
| 232 |
+
return gr.update(value=out)
|
| 233 |
|
| 234 |
except Exception as e:
|
| 235 |
print("[run] ERROR:", repr(e), flush=True)
|
| 236 |
+
return gr.update(value=_friendly_error(e))
|
| 237 |
|
| 238 |
|
| 239 |
print("[boot] app.py loaded", flush=True)
|
| 240 |
|
| 241 |
+
with gr.Blocks(title="Financial Aid Navigator (3 models)") as demo:
|
| 242 |
+
gr.Markdown("# Financial Aid Navigator (3 models via Ollama)")
|
| 243 |
gr.Markdown(
|
|
|
|
| 244 |
f"- Ollama: `{OLLAMA_BASE}`\n"
|
| 245 |
+
f"- PIPELINE: `{PIPELINE}`\n"
|
| 246 |
+
f"- TRIAGE_MODEL: `{TRIAGE_MODEL}`\n"
|
| 247 |
+
f"- ACTIONS_MODEL: `{ACTIONS_MODEL}`\n"
|
| 248 |
+
f"- WRITER_MODEL: `{WRITER_MODEL}`\n"
|
| 249 |
+
f"- NUM_CTX: `{NUM_CTX}`, MAX_TOKENS: `{MAX_TOKENS}`, TIMEOUT: `{LITELLM_TIMEOUT}`"
|
| 250 |
)
|
| 251 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
region = gr.Textbox(label="Регион", lines=1)
|
| 253 |
urgency = gr.Dropdown(
|
| 254 |
["срочно (24–72 часа)", "в течение недели", "не срочно (в течение месяца)"],
|
|
|
|
| 259 |
allow_internet = gr.Checkbox(label="Разрешить поиск в интернете", value=False)
|
| 260 |
|
| 261 |
run_btn = gr.Button("Сформировать план помощи")
|
| 262 |
+
output = gr.Textbox(label="План", lines=24)
|
| 263 |
|
| 264 |
run_btn.click(
|
| 265 |
fn=run_fin_aid,
|
|
|
|
| 268 |
queue=False,
|
| 269 |
)
|
| 270 |
|
|
|
|
|
|
|
|
|
|
| 271 |
def main():
|
| 272 |
server_name = os.getenv("GRADIO_SERVER_NAME", "0.0.0.0")
|
| 273 |
server_port = int(os.getenv("GRADIO_SERVER_PORT", "7860"))
|
start.sh
CHANGED
|
@@ -1,11 +1,16 @@
|
|
| 1 |
#!/usr/bin/env bash
|
| 2 |
set -euo pipefail
|
| 3 |
|
| 4 |
-
MODEL_NAME="${MODEL_NAME:-qwen2.5-coder:3b}"
|
| 5 |
OLLAMA_URL="${OLLAMA_URL:-http://127.0.0.1:11434}"
|
| 6 |
-
export MODEL_NAME
|
| 7 |
export OLLAMA_URL
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
| 10 |
cd "$SCRIPT_DIR"
|
| 11 |
|
|
@@ -13,7 +18,6 @@ echo "[start.sh] Starting Ollama..."
|
|
| 13 |
ollama serve > /tmp/ollama.log 2>&1 &
|
| 14 |
OLLAMA_PID=$!
|
| 15 |
|
| 16 |
-
# Wait until Ollama is ready
|
| 17 |
python3 - << 'PY'
|
| 18 |
import os, time, urllib.request
|
| 19 |
base = os.environ.get("OLLAMA_URL","http://127.0.0.1:11434").rstrip("/")
|
|
@@ -28,38 +32,55 @@ for _ in range(180):
|
|
| 28 |
raise SystemExit("Ollama did not start")
|
| 29 |
PY
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
fi
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
-
|
| 40 |
-
echo "[start.sh] Warming up model..."
|
| 41 |
python3 - << 'PY'
|
| 42 |
import os, json, urllib.request
|
|
|
|
| 43 |
base = os.environ.get("OLLAMA_URL","http://127.0.0.1:11434").rstrip("/")
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
)
|
| 61 |
-
urllib.request.urlopen(req, timeout=1200).read()
|
| 62 |
-
print("Warmup OK")
|
| 63 |
PY
|
| 64 |
|
| 65 |
echo "[start.sh] Launching app..."
|
|
|
|
| 1 |
#!/usr/bin/env bash
|
| 2 |
set -euo pipefail
|
| 3 |
|
|
|
|
| 4 |
OLLAMA_URL="${OLLAMA_URL:-http://127.0.0.1:11434}"
|
|
|
|
| 5 |
export OLLAMA_URL
|
| 6 |
|
| 7 |
+
MODEL_NAME="${MODEL_NAME:-qwen2.5-coder:1.5b}"
|
| 8 |
+
TRIAGE_MODEL="${TRIAGE_MODEL:-$MODEL_NAME}"
|
| 9 |
+
ACTIONS_MODEL="${ACTIONS_MODEL:-$MODEL_NAME}"
|
| 10 |
+
WRITER_MODEL="${WRITER_MODEL:-$MODEL_NAME}"
|
| 11 |
+
|
| 12 |
+
export MODEL_NAME TRIAGE_MODEL ACTIONS_MODEL WRITER_MODEL
|
| 13 |
+
|
| 14 |
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
| 15 |
cd "$SCRIPT_DIR"
|
| 16 |
|
|
|
|
| 18 |
ollama serve > /tmp/ollama.log 2>&1 &
|
| 19 |
OLLAMA_PID=$!
|
| 20 |
|
|
|
|
| 21 |
python3 - << 'PY'
|
| 22 |
import os, time, urllib.request
|
| 23 |
base = os.environ.get("OLLAMA_URL","http://127.0.0.1:11434").rstrip("/")
|
|
|
|
| 32 |
raise SystemExit("Ollama did not start")
|
| 33 |
PY
|
| 34 |
|
| 35 |
+
# Уникальный список моделей
|
| 36 |
+
MODELS=("$MODEL_NAME" "$TRIAGE_MODEL" "$ACTIONS_MODEL" "$WRITER_MODEL")
|
| 37 |
+
UNIQ_MODELS=()
|
| 38 |
+
for m in "${MODELS[@]}"; do
|
| 39 |
+
skip=false
|
| 40 |
+
for u in "${UNIQ_MODELS[@]}"; do
|
| 41 |
+
if [ "$u" = "$m" ]; then skip=true; fi
|
| 42 |
+
done
|
| 43 |
+
if [ "$skip" = false ]; then UNIQ_MODELS+=("$m"); fi
|
| 44 |
+
done
|
| 45 |
+
|
| 46 |
+
echo "[start.sh] Ensuring models exist..."
|
| 47 |
+
for m in "${UNIQ_MODELS[@]}"; do
|
| 48 |
+
echo "[start.sh] model: $m"
|
| 49 |
+
if ! ollama show "$m" >/dev/null 2>&1; then
|
| 50 |
+
echo "[start.sh] pulling $m..."
|
| 51 |
+
ollama pull "$m"
|
| 52 |
+
else
|
| 53 |
+
echo "[start.sh] already present"
|
| 54 |
+
fi
|
| 55 |
+
done
|
| 56 |
|
| 57 |
+
echo "[start.sh] Warming up models..."
|
|
|
|
| 58 |
python3 - << 'PY'
|
| 59 |
import os, json, urllib.request
|
| 60 |
+
|
| 61 |
base = os.environ.get("OLLAMA_URL","http://127.0.0.1:11434").rstrip("/")
|
| 62 |
+
ctx = int(os.environ.get("OLLAMA_CONTEXT_LENGTH","2048"))
|
| 63 |
+
|
| 64 |
+
models = []
|
| 65 |
+
for k in ("MODEL_NAME","TRIAGE_MODEL","ACTIONS_MODEL","WRITER_MODEL"):
|
| 66 |
+
v = os.environ.get(k)
|
| 67 |
+
if v and v not in models:
|
| 68 |
+
models.append(v)
|
| 69 |
|
| 70 |
+
for m in models:
|
| 71 |
+
payload = {
|
| 72 |
+
"model": m,
|
| 73 |
+
"prompt": "ping",
|
| 74 |
+
"stream": False,
|
| 75 |
+
"options": {"num_ctx": ctx, "num_predict": 16}
|
| 76 |
+
}
|
| 77 |
+
req = urllib.request.Request(
|
| 78 |
+
base + "/api/generate",
|
| 79 |
+
data=json.dumps(payload).encode(),
|
| 80 |
+
headers={"Content-Type":"application/json"},
|
| 81 |
+
)
|
| 82 |
+
urllib.request.urlopen(req, timeout=1800).read()
|
| 83 |
+
print("Warmup OK:", m)
|
|
|
|
|
|
|
| 84 |
PY
|
| 85 |
|
| 86 |
echo "[start.sh] Launching app..."
|