codingwithadi commited on
Commit
be820ad
·
verified ·
1 Parent(s): 81598c5

Upload scripts/ingest.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. scripts/ingest.py +16 -9
scripts/ingest.py CHANGED
@@ -11,6 +11,8 @@ Usage:
11
  C:\\Python313\\python scripts/ingest.py
12
  C:\\Python313\\python scripts/ingest.py --provider azure
13
  C:\\Python313\\python scripts/ingest.py --fresh-raindrop (also pulls live from Raindrop API)
 
 
14
  """
15
 
16
  import sys
@@ -59,8 +61,9 @@ def build_similar_to_edges(items: list[dict], embedder, top_k: int = 5):
59
 
60
  def main():
61
  parser = argparse.ArgumentParser(description="OpenMark Ingest Pipeline")
62
- parser.add_argument("--provider", default=None, help="Embedding provider: local or azure")
63
  parser.add_argument("--fresh-raindrop", action="store_true", help="Also pull fresh from Raindrop API")
 
64
  parser.add_argument("--skip-similar", action="store_true", help="Skip SIMILAR_TO edge computation")
65
  args = parser.parse_args()
66
 
@@ -84,20 +87,24 @@ def main():
84
  print("\n[3/4] Ingesting into ChromaDB...")
85
  chroma_store.ingest(items, embedder)
86
 
87
- # Step 4: Neo4j
88
- print("\n[4/4] Ingesting into Neo4j...")
89
- neo4j_store.ingest(items)
 
90
 
91
- # Step 5: SIMILAR_TO edges
92
- if not args.skip_similar:
93
- build_similar_to_edges(items, embedder, top_k=5)
 
 
94
 
95
  print("\n" + "=" * 60)
96
  print("INGEST COMPLETE")
97
  chroma = chroma_store.get_stats()
98
- neo4j = neo4j_store.get_stats()
99
  print(f" ChromaDB: {chroma.get('total', 0)} vectors")
100
- print(f" Neo4j: {neo4j.get('bookmarks', 0)} bookmarks, {neo4j.get('tags', 0)} tags")
 
 
101
  print("=" * 60)
102
  print("\nNow run: C:\\Python313\\python scripts/search.py \"your query\"")
103
  print(" or: C:\\Python313\\python -m openmark.ui.app")
 
11
  C:\\Python313\\python scripts/ingest.py
12
  C:\\Python313\\python scripts/ingest.py --provider azure
13
  C:\\Python313\\python scripts/ingest.py --fresh-raindrop (also pulls live from Raindrop API)
14
+ C:\\Python313\\python scripts/ingest.py --skip-neo4j (ChromaDB only, no Neo4j required)
15
+ C:\\Python313\\python scripts/ingest.py --skip-similar (skip SIMILAR_TO edge computation)
16
  """
17
 
18
  import sys
 
61
 
62
  def main():
63
  parser = argparse.ArgumentParser(description="OpenMark Ingest Pipeline")
64
+ parser.add_argument("--provider", default=None, help="Embedding provider: local or azure")
65
  parser.add_argument("--fresh-raindrop", action="store_true", help="Also pull fresh from Raindrop API")
66
+ parser.add_argument("--skip-neo4j", action="store_true", help="Skip Neo4j entirely (ChromaDB only)")
67
  parser.add_argument("--skip-similar", action="store_true", help="Skip SIMILAR_TO edge computation")
68
  args = parser.parse_args()
69
 
 
87
  print("\n[3/4] Ingesting into ChromaDB...")
88
  chroma_store.ingest(items, embedder)
89
 
90
+ # Step 4: Neo4j (optional)
91
+ if not args.skip_neo4j:
92
+ print("\n[4/4] Ingesting into Neo4j...")
93
+ neo4j_store.ingest(items)
94
 
95
+ # Step 5: SIMILAR_TO edges
96
+ if not args.skip_similar:
97
+ build_similar_to_edges(items, embedder, top_k=5)
98
+ else:
99
+ print("\n[4/4] Neo4j skipped.")
100
 
101
  print("\n" + "=" * 60)
102
  print("INGEST COMPLETE")
103
  chroma = chroma_store.get_stats()
 
104
  print(f" ChromaDB: {chroma.get('total', 0)} vectors")
105
+ if not args.skip_neo4j:
106
+ neo4j = neo4j_store.get_stats()
107
+ print(f" Neo4j: {neo4j.get('bookmarks', 0)} bookmarks, {neo4j.get('tags', 0)} tags")
108
  print("=" * 60)
109
  print("\nNow run: C:\\Python313\\python scripts/search.py \"your query\"")
110
  print(" or: C:\\Python313\\python -m openmark.ui.app")