bat-6 commited on
Commit
e24894a
·
1 Parent(s): 8463318

feat: implement project synchronization service with database integration and automated embedding updates

Browse files
src/similarity_model/sync_projects.py CHANGED
@@ -2,6 +2,10 @@ import json
2
  import logging
3
  import sys
4
  import pandas as pd
 
 
 
 
5
 
6
  from Data.database.sql_connector import engine
7
  from src.similarity_model.preprocessing import preprocess_dataset
@@ -45,9 +49,9 @@ def sync_projects():
45
  logger.info(f"Loaded {len(projects_df)} active projects from database.")
46
 
47
 
48
- logger.info("Fetching existing records from 'PreProcessed_Projects'...")
49
  with engine.connect() as conn:
50
- existing_df = pd.read_sql("SELECT id FROM PreProcessed_Projects", conn)
51
 
52
  allowed_ids = set(projects_df["Id"].tolist())
53
  processed_ids = set(existing_df["id"].tolist())
@@ -62,9 +66,9 @@ def sync_projects():
62
 
63
  with engine.begin() as conn:
64
  conn.exec_driver_sql(
65
- f"DELETE FROM PreProcessed_Projects WHERE id IN ({ids_str})"
66
  )
67
- logger.info(f"Successfully removed {len(ids_to_remove)} projects from 'PreProcessed_Projects'.")
68
  changed = True
69
 
70
 
@@ -118,7 +122,7 @@ def sync_projects():
118
  logger.info("Uploading preprocessed records to database...")
119
  with engine.begin() as conn:
120
  processed_df.to_sql(
121
- "PreProcessed_Projects",
122
  conn,
123
  if_exists="append",
124
  index=False
@@ -137,3 +141,6 @@ def sync_projects():
137
  logger.info("Local embeddings and index updated successfully.")
138
  else:
139
  logger.info("No database changes detected. Embeddings remain in sync.")
 
 
 
 
2
  import logging
3
  import sys
4
  import pandas as pd
5
+ from pathlib import Path
6
+
7
+ # Ensure workspace root is in path
8
+ sys.path.append(str(Path(__file__).resolve().parents[2]))
9
 
10
  from Data.database.sql_connector import engine
11
  from src.similarity_model.preprocessing import preprocess_dataset
 
49
  logger.info(f"Loaded {len(projects_df)} active projects from database.")
50
 
51
 
52
+ logger.info("Fetching existing records from 'preprocess'...")
53
  with engine.connect() as conn:
54
+ existing_df = pd.read_sql("SELECT id FROM preprocess", conn)
55
 
56
  allowed_ids = set(projects_df["Id"].tolist())
57
  processed_ids = set(existing_df["id"].tolist())
 
66
 
67
  with engine.begin() as conn:
68
  conn.exec_driver_sql(
69
+ f"DELETE FROM preprocess WHERE id IN ({ids_str})"
70
  )
71
+ logger.info(f"Successfully removed {len(ids_to_remove)} projects from 'preprocess'.")
72
  changed = True
73
 
74
 
 
122
  logger.info("Uploading preprocessed records to database...")
123
  with engine.begin() as conn:
124
  processed_df.to_sql(
125
+ "preprocess",
126
  conn,
127
  if_exists="append",
128
  index=False
 
141
  logger.info("Local embeddings and index updated successfully.")
142
  else:
143
  logger.info("No database changes detected. Embeddings remain in sync.")
144
+
145
+ if __name__ == "__main__":
146
+ sync_projects()