antonypamo commited on
Commit
d0c27d9
verified
1 Parent(s): eecda3c

Patch: .dockerignore, disable Dockerfile, add remote dataset loader

Browse files
Files changed (2) hide show
  1. .dockerignore +1 -2
  2. app.py +22 -0
.dockerignore CHANGED
@@ -1,4 +1,4 @@
1
- # datasets / binaries / artifacts
2
  data/**
3
  savant_rrf1/**
4
  **/*.jsonl
@@ -23,7 +23,6 @@ savant_rrf1/**
23
  # caches & venvs
24
  __pycache__/
25
  *.pyc
26
- *.pyo
27
  .venv/
28
  .env
29
  .cache/
 
1
+ # Keep image tiny: exclude datasets, models, artifacts, logs, media
2
  data/**
3
  savant_rrf1/**
4
  **/*.jsonl
 
23
  # caches & venvs
24
  __pycache__/
25
  *.pyc
 
26
  .venv/
27
  .env
28
  .cache/
app.py CHANGED
@@ -139,3 +139,25 @@ with gr.Blocks() as demo:
139
 
140
  # En Spaces no se usa share=True
141
  demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  # En Spaces no se usa share=True
141
  demo.launch()
142
+
143
+
144
+ # ---- Remote dataset fallback (auto-injected) ----
145
+ import os
146
+ def _try_remote_sample(repo_id: str, split: str = "train", fields=("prompt","completion"), limit=2000):
147
+ try:
148
+ from datasets import load_dataset
149
+ ds = load_dataset(repo_id, split=split)
150
+ texts=[]
151
+ for i,row in enumerate(ds):
152
+ if all(k in row for k in fields):
153
+ texts.append(f"prompt: {row['prompt']}\ncompletion: {row['completion']}")
154
+ else:
155
+ for k in ("text","content","body"):
156
+ if k in row and isinstance(row[k], str):
157
+ texts.append(row[k]); break
158
+ if len(texts) >= limit: break
159
+ return texts
160
+ except Exception:
161
+ return []
162
+ USE_REMOTE = os.getenv("SAVANT_REMOTE_DATASET","").strip()
163
+