Subhajit Chakraborty commited on
Commit
aef0887
·
1 Parent(s): a14162a

update(10)

Browse files
src/app.py CHANGED
@@ -275,7 +275,7 @@ if st.session_state.page == "Enrich Companies":
275
  response = qservice.query()
276
  print(response)
277
  print("Initial extraction is done. Now cleaning the JSON...")
278
- with open("data/uncleaned_companies.json", "r") as f:
279
  data = json.load(f)
280
 
281
  cleaned_data = clean_json.clean_json_f(data)
@@ -314,7 +314,7 @@ if st.session_state.page == "Enrich Companies":
314
  res = lead_scorer.scrape_and_augment(
315
  own_comp_info, own_comp_web_url
316
  )
317
- with open("src/data/lead_conditions.json", "w") as f:
318
  json.dump(res, f, indent=2)
319
 
320
  scored_leads = lead_scorer.score(enhanced_data, res)
@@ -413,13 +413,13 @@ if st.session_state.page == "IntelliSCORE":
413
  res = lead_scorer.scrape_and_augment(
414
  additional_info, comp_url
415
  )
416
- with open("src/data/lead_conditions.json", "w") as f:
417
  json.dump(res, f, indent=2)
418
  st.success("Scrapping Completed!")
419
  if res and "error" not in res:
420
  st.session_state.lead_conditions = True
421
 
422
- with open("src/data/lead_conditions.json", "r") as f:
423
  lead_cond = json.load(f)
424
  with st.spinner("Scoring the leads..."):
425
  scored_leads = lead_scorer.score(leads, lead_cond)
@@ -430,7 +430,7 @@ if st.session_state.page == "IntelliSCORE":
430
 
431
  else:
432
  st.text("Skipping url scrapping...")
433
- with open("src/data/lead_conditions.json", "r") as f:
434
  lead_cond = json.load(f)
435
  with st.spinner("Scoring the leads..."):
436
  scored_leads = lead_scorer.score(leads, lead_cond)
 
275
  response = qservice.query()
276
  print(response)
277
  print("Initial extraction is done. Now cleaning the JSON...")
278
+ with open("/tmp/data/uncleaned_companies.json", "r") as f:
279
  data = json.load(f)
280
 
281
  cleaned_data = clean_json.clean_json_f(data)
 
314
  res = lead_scorer.scrape_and_augment(
315
  own_comp_info, own_comp_web_url
316
  )
317
+ with open(os.path.join(DATA_DIR, "lead_conditions.json"), "w") as f:
318
  json.dump(res, f, indent=2)
319
 
320
  scored_leads = lead_scorer.score(enhanced_data, res)
 
413
  res = lead_scorer.scrape_and_augment(
414
  additional_info, comp_url
415
  )
416
+ with open(os.path.join(DATA_DIR, "lead_conditions.json"), "w") as f:
417
  json.dump(res, f, indent=2)
418
  st.success("Scrapping Completed!")
419
  if res and "error" not in res:
420
  st.session_state.lead_conditions = True
421
 
422
+ with open(os.path.join(DATA_DIR, "lead_conditions.json"), "r") as f:
423
  lead_cond = json.load(f)
424
  with st.spinner("Scoring the leads..."):
425
  scored_leads = lead_scorer.score(leads, lead_cond)
 
430
 
431
  else:
432
  st.text("Skipping url scrapping...")
433
+ with open(os.path.join(DATA_DIR, "lead_conditions.json"), "r") as f:
434
  lead_cond = json.load(f)
435
  with st.spinner("Scoring the leads..."):
436
  scored_leads = lead_scorer.score(leads, lead_cond)
src/post_extraction_tools/data_quality_enhancer.py CHANGED
@@ -124,7 +124,7 @@ def enhancer(data: object, embedder) -> list:
124
 
125
 
126
  def add_ind_key(data: list, embedder) -> list:
127
- with open("src/data/key_industry_embeddings.json", "r") as f:
128
  key_ind_embs = json.load(f)["industry_embeddings"]
129
  for c in data:
130
  if "key_industry" not in c:
 
124
 
125
 
126
  def add_ind_key(data: list, embedder) -> list:
127
+ with open("/tmp/data/key_industry_embeddings.json", "r") as f:
128
  key_ind_embs = json.load(f)["industry_embeddings"]
129
  for c in data:
130
  if "key_industry" not in c:
src/post_extraction_tools/lead_scoring.py CHANGED
@@ -113,8 +113,7 @@ class LeadScoring:
113
 
114
  lead['score'] = score
115
  # scored_leads.append(lead)
116
- root_dir = os.path.dirname(os.path.abspath(__file__))
117
- data_folder = os.path.join(root_dir, "..", "data")
118
  os.makedirs(data_folder, exist_ok=True)
119
 
120
  file_path = os.path.join(data_folder, "all_cleaned_companies.json")
 
113
 
114
  lead['score'] = score
115
  # scored_leads.append(lead)
116
+ data_folder = "/tmp/data"
 
117
  os.makedirs(data_folder, exist_ok=True)
118
 
119
  file_path = os.path.join(data_folder, "all_cleaned_companies.json")
src/post_extraction_tools/website_adder.py CHANGED
@@ -160,8 +160,7 @@ def wiki_search_mode(companies):
160
  else:
161
  print("All companies already have website URLs. Skipping wikisearch mode...")
162
  print("Saving results...")
163
- root_dir = os.path.dirname(os.path.abspath(__file__))
164
- data_folder = os.path.join(root_dir, "..", "data")
165
  os.makedirs(data_folder, exist_ok=True)
166
 
167
  file_path = os.path.join(data_folder, "all_cleaned_companies.json")
 
160
  else:
161
  print("All companies already have website URLs. Skipping wikisearch mode...")
162
  print("Saving results...")
163
+ data_folder = "/tmp/data"
 
164
  os.makedirs(data_folder, exist_ok=True)
165
 
166
  file_path = os.path.join(data_folder, "all_cleaned_companies.json")
src/services/queryService.py CHANGED
@@ -110,7 +110,7 @@ class QService:
110
  # filename = f"companies_{self.industry_type}_{self.location}.json".replace(" ", "_").lower()
111
  filename = "uncleaned_companies.json"
112
  try:
113
- with open(f"data/{filename}", 'w', encoding="utf-8") as f:
114
  f.write(json_output)
115
  print(f"Data successfully written to {filename}")
116
  except Exception as e:
 
110
  # filename = f"companies_{self.industry_type}_{self.location}.json".replace(" ", "_").lower()
111
  filename = "uncleaned_companies.json"
112
  try:
113
+ with open(f"/tmp/data/{filename}", 'w', encoding="utf-8") as f:
114
  f.write(json_output)
115
  print(f"Data successfully written to {filename}")
116
  except Exception as e: