paulcalzada commited on
Commit
184c939
·
1 Parent(s): c15724f

updated for private faiss

Browse files
Files changed (1) hide show
  1. app.py +13 -19
app.py CHANGED
@@ -147,14 +147,13 @@ Generate the complete Verilog code.
147
 
148
  # --------------------------- Space wiring below ---------------------------
149
 
150
- # Where we’ll place the FAISS index on disk after downloading from your private dataset:
151
  CACHE_DIR = Path("/data/faiss_index") # Spaces ephemeral storage
152
  CACHE_DIR.mkdir(parents=True, exist_ok=True)
153
 
154
  # Env vars you’ll set in the Space “Settings → Repository secrets”
155
  HF_TOKEN = os.getenv("HF_TOKEN") # personal access token with read permission
156
- PRIVATE_DATASET_ID = os.getenv("PRIVATE_DATASET_ID") # e.g. "yourname/verilog-faiss-index"
157
- INDEX_SUBDIR = os.getenv("INDEX_SUBDIR", "faiss_index") # optional subdir within the dataset snapshot
158
  EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
159
 
160
  def ensure_index_downloaded() -> Path:
@@ -170,7 +169,7 @@ def ensure_index_downloaded() -> Path:
170
  if not HF_TOKEN:
171
  raise RuntimeError("Missing HF_TOKEN secret. Add it in the Space settings.")
172
  if not PRIVATE_DATASET_ID:
173
- raise RuntimeError("Missing PRIVATE_DATASET_ID secret (e.g., 'user/private-faiss').")
174
 
175
  print(f"[INFO] Downloading private dataset: {PRIVATE_DATASET_ID}")
176
  snapshot_path = snapshot_download(
@@ -178,16 +177,17 @@ def ensure_index_downloaded() -> Path:
178
  repo_type="dataset",
179
  token=HF_TOKEN,
180
  local_dir=str(CACHE_DIR),
181
- local_dir_use_symlinks=False, # safer for FAISS
182
  )
183
- # If your index files live under a folder inside the dataset, move/point to it
 
184
  candidate = Path(snapshot_path) / INDEX_SUBDIR
185
- if candidate.exists():
186
- print(f"[INFO] Found index subdir at {candidate}")
187
  return candidate
188
 
189
- # Otherwise assume snapshot root contains the index
190
- print(f"[WARN] INDEX_SUBDIR='{INDEX_SUBDIR}' not found; using snapshot root.")
191
  return Path(snapshot_path)
192
 
193
  # Keep a lightweight global cache so we don’t reload embeddings on every click
@@ -220,7 +220,6 @@ def run_generation(spec, use_rag, top_k, model_choice, api_key, temperature, top
220
  retrieved_preview = []
221
  if use_rag:
222
  try:
223
- # similarity_search_with_score returns list[(Document, score)]
224
  docs_with_scores = agent.vectorstore.similarity_search_with_score(spec, k=top_k)
225
  for doc, score in docs_with_scores:
226
  src = doc.metadata.get("source_file", doc.metadata.get("module", "unknown"))
@@ -240,10 +239,7 @@ def run_generation(spec, use_rag, top_k, model_choice, api_key, temperature, top
240
  else:
241
  code = agent.generate_baseline(spec, gen_params)
242
 
243
- # Clean presentation
244
- verilog_block = code.strip()
245
- # Show the first few chars of the retrieved examples (for transparency)
246
- return verilog_block, ("\n".join(retrieved_preview) if retrieved_preview else ""), [d[0].page_content for d in docs_with_scores]
247
 
248
 
249
  with gr.Blocks(title="DeepRAG for RTL (Model-Agnostic)") as demo:
@@ -266,8 +262,8 @@ with gr.Blocks(title="DeepRAG for RTL (Model-Agnostic)") as demo:
266
  "gpt-4o",
267
  "gpt-4o-mini",
268
  "gpt-4.1",
269
- "gpt-5", # hypothetical/future-ready
270
- "gpt-5-mini" # hypothetical/future-ready
271
  ],
272
  value="gpt-4o",
273
  label="Model"
@@ -281,7 +277,6 @@ with gr.Blocks(title="DeepRAG for RTL (Model-Agnostic)") as demo:
281
 
282
  run_btn = gr.Button("Generate Verilog", variant="primary")
283
 
284
- # Right side: code output + retrieval transparency
285
  with gr.Column(scale=3):
286
  gr.Markdown("**Output**")
287
  out_code = gr.Code(
@@ -298,7 +293,6 @@ with gr.Blocks(title="DeepRAG for RTL (Model-Agnostic)") as demo:
298
  interactive=False
299
  )
300
  with gr.Tab("Preview of Retrieved Context (raw)"):
301
- # shows the raw text of retrieved docs for transparency (not downloadable)
302
  retrieved_raw = gr.HighlightedText(label="(first K documents)", combine_adjacent=True)
303
 
304
  run_btn.click(
 
147
 
148
  # --------------------------- Space wiring below ---------------------------
149
 
 
150
  CACHE_DIR = Path("/data/faiss_index") # Spaces ephemeral storage
151
  CACHE_DIR.mkdir(parents=True, exist_ok=True)
152
 
153
  # Env vars you’ll set in the Space “Settings → Repository secrets”
154
  HF_TOKEN = os.getenv("HF_TOKEN") # personal access token with read permission
155
+ PRIVATE_DATASET_ID = os.getenv("PRIVATE_DATASET_ID") # e.g. "yourname/VerilogDB_faiss"
156
+ INDEX_SUBDIR = os.getenv("INDEX_SUBDIR", ".") # since your files are at repo root
157
  EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
158
 
159
  def ensure_index_downloaded() -> Path:
 
169
  if not HF_TOKEN:
170
  raise RuntimeError("Missing HF_TOKEN secret. Add it in the Space settings.")
171
  if not PRIVATE_DATASET_ID:
172
+ raise RuntimeError("Missing PRIVATE_DATASET_ID secret (e.g., 'user/VerilogDB_faiss').")
173
 
174
  print(f"[INFO] Downloading private dataset: {PRIVATE_DATASET_ID}")
175
  snapshot_path = snapshot_download(
 
177
  repo_type="dataset",
178
  token=HF_TOKEN,
179
  local_dir=str(CACHE_DIR),
180
+ local_dir_use_symlinks=False,
181
  )
182
+
183
+ # Prefer INDEX_SUBDIR if present
184
  candidate = Path(snapshot_path) / INDEX_SUBDIR
185
+ if candidate.exists() and any(candidate.iterdir()):
186
+ print(f"[INFO] Found index files under {candidate}")
187
  return candidate
188
 
189
+ # Otherwise use snapshot root
190
+ print(f"[WARN] INDEX_SUBDIR='{INDEX_SUBDIR}' not found or empty; using snapshot root.")
191
  return Path(snapshot_path)
192
 
193
  # Keep a lightweight global cache so we don’t reload embeddings on every click
 
220
  retrieved_preview = []
221
  if use_rag:
222
  try:
 
223
  docs_with_scores = agent.vectorstore.similarity_search_with_score(spec, k=top_k)
224
  for doc, score in docs_with_scores:
225
  src = doc.metadata.get("source_file", doc.metadata.get("module", "unknown"))
 
239
  else:
240
  code = agent.generate_baseline(spec, gen_params)
241
 
242
+ return code.strip(), ("\n".join(retrieved_preview) if retrieved_preview else ""), [d[0].page_content for d in docs_with_scores]
 
 
 
243
 
244
 
245
  with gr.Blocks(title="DeepRAG for RTL (Model-Agnostic)") as demo:
 
262
  "gpt-4o",
263
  "gpt-4o-mini",
264
  "gpt-4.1",
265
+ "gpt-5",
266
+ "gpt-5-mini"
267
  ],
268
  value="gpt-4o",
269
  label="Model"
 
277
 
278
  run_btn = gr.Button("Generate Verilog", variant="primary")
279
 
 
280
  with gr.Column(scale=3):
281
  gr.Markdown("**Output**")
282
  out_code = gr.Code(
 
293
  interactive=False
294
  )
295
  with gr.Tab("Preview of Retrieved Context (raw)"):
 
296
  retrieved_raw = gr.HighlightedText(label="(first K documents)", combine_adjacent=True)
297
 
298
  run_btn.click(