Spaces:
Running
Running
Subhajit Chakraborty
commited on
Commit
·
aef0887
1
Parent(s):
a14162a
update(10)
Browse files
src/app.py
CHANGED
|
@@ -275,7 +275,7 @@ if st.session_state.page == "Enrich Companies":
|
|
| 275 |
response = qservice.query()
|
| 276 |
print(response)
|
| 277 |
print("Initial extraction is done. Now cleaning the JSON...")
|
| 278 |
-
with open("data/uncleaned_companies.json", "r") as f:
|
| 279 |
data = json.load(f)
|
| 280 |
|
| 281 |
cleaned_data = clean_json.clean_json_f(data)
|
|
@@ -314,7 +314,7 @@ if st.session_state.page == "Enrich Companies":
|
|
| 314 |
res = lead_scorer.scrape_and_augment(
|
| 315 |
own_comp_info, own_comp_web_url
|
| 316 |
)
|
| 317 |
-
with open("
|
| 318 |
json.dump(res, f, indent=2)
|
| 319 |
|
| 320 |
scored_leads = lead_scorer.score(enhanced_data, res)
|
|
@@ -413,13 +413,13 @@ if st.session_state.page == "IntelliSCORE":
|
|
| 413 |
res = lead_scorer.scrape_and_augment(
|
| 414 |
additional_info, comp_url
|
| 415 |
)
|
| 416 |
-
with open("
|
| 417 |
json.dump(res, f, indent=2)
|
| 418 |
st.success("Scrapping Completed!")
|
| 419 |
if res and "error" not in res:
|
| 420 |
st.session_state.lead_conditions = True
|
| 421 |
|
| 422 |
-
with open("
|
| 423 |
lead_cond = json.load(f)
|
| 424 |
with st.spinner("Scoring the leads..."):
|
| 425 |
scored_leads = lead_scorer.score(leads, lead_cond)
|
|
@@ -430,7 +430,7 @@ if st.session_state.page == "IntelliSCORE":
|
|
| 430 |
|
| 431 |
else:
|
| 432 |
st.text("Skipping url scrapping...")
|
| 433 |
-
with open("
|
| 434 |
lead_cond = json.load(f)
|
| 435 |
with st.spinner("Scoring the leads..."):
|
| 436 |
scored_leads = lead_scorer.score(leads, lead_cond)
|
|
|
|
| 275 |
response = qservice.query()
|
| 276 |
print(response)
|
| 277 |
print("Initial extraction is done. Now cleaning the JSON...")
|
| 278 |
+
with open("/tmp/data/uncleaned_companies.json", "r") as f:
|
| 279 |
data = json.load(f)
|
| 280 |
|
| 281 |
cleaned_data = clean_json.clean_json_f(data)
|
|
|
|
| 314 |
res = lead_scorer.scrape_and_augment(
|
| 315 |
own_comp_info, own_comp_web_url
|
| 316 |
)
|
| 317 |
+
with open(os.path.join(DATA_DIR, "lead_conditions.json"), "w") as f:
|
| 318 |
json.dump(res, f, indent=2)
|
| 319 |
|
| 320 |
scored_leads = lead_scorer.score(enhanced_data, res)
|
|
|
|
| 413 |
res = lead_scorer.scrape_and_augment(
|
| 414 |
additional_info, comp_url
|
| 415 |
)
|
| 416 |
+
with open(os.path.join(DATA_DIR, "lead_conditions.json"), "w") as f:
|
| 417 |
json.dump(res, f, indent=2)
|
| 418 |
st.success("Scrapping Completed!")
|
| 419 |
if res and "error" not in res:
|
| 420 |
st.session_state.lead_conditions = True
|
| 421 |
|
| 422 |
+
with open(os.path.join(DATA_DIR, "lead_conditions.json"), "r") as f:
|
| 423 |
lead_cond = json.load(f)
|
| 424 |
with st.spinner("Scoring the leads..."):
|
| 425 |
scored_leads = lead_scorer.score(leads, lead_cond)
|
|
|
|
| 430 |
|
| 431 |
else:
|
| 432 |
st.text("Skipping url scrapping...")
|
| 433 |
+
with open(os.path.join(DATA_DIR, "lead_conditions.json"), "r") as f:
|
| 434 |
lead_cond = json.load(f)
|
| 435 |
with st.spinner("Scoring the leads..."):
|
| 436 |
scored_leads = lead_scorer.score(leads, lead_cond)
|
src/post_extraction_tools/data_quality_enhancer.py
CHANGED
|
@@ -124,7 +124,7 @@ def enhancer(data: object, embedder) -> list:
|
|
| 124 |
|
| 125 |
|
| 126 |
def add_ind_key(data: list, embedder) -> list:
|
| 127 |
-
with open("
|
| 128 |
key_ind_embs = json.load(f)["industry_embeddings"]
|
| 129 |
for c in data:
|
| 130 |
if "key_industry" not in c:
|
|
|
|
| 124 |
|
| 125 |
|
| 126 |
def add_ind_key(data: list, embedder) -> list:
|
| 127 |
+
with open("/tmp/data/key_industry_embeddings.json", "r") as f:
|
| 128 |
key_ind_embs = json.load(f)["industry_embeddings"]
|
| 129 |
for c in data:
|
| 130 |
if "key_industry" not in c:
|
src/post_extraction_tools/lead_scoring.py
CHANGED
|
@@ -113,8 +113,7 @@ class LeadScoring:
|
|
| 113 |
|
| 114 |
lead['score'] = score
|
| 115 |
# scored_leads.append(lead)
|
| 116 |
-
|
| 117 |
-
data_folder = os.path.join(root_dir, "..", "data")
|
| 118 |
os.makedirs(data_folder, exist_ok=True)
|
| 119 |
|
| 120 |
file_path = os.path.join(data_folder, "all_cleaned_companies.json")
|
|
|
|
| 113 |
|
| 114 |
lead['score'] = score
|
| 115 |
# scored_leads.append(lead)
|
| 116 |
+
data_folder = "/tmp/data"
|
|
|
|
| 117 |
os.makedirs(data_folder, exist_ok=True)
|
| 118 |
|
| 119 |
file_path = os.path.join(data_folder, "all_cleaned_companies.json")
|
src/post_extraction_tools/website_adder.py
CHANGED
|
@@ -160,8 +160,7 @@ def wiki_search_mode(companies):
|
|
| 160 |
else:
|
| 161 |
print("All companies already have website URLs. Skipping wikisearch mode...")
|
| 162 |
print("Saving results...")
|
| 163 |
-
|
| 164 |
-
data_folder = os.path.join(root_dir, "..", "data")
|
| 165 |
os.makedirs(data_folder, exist_ok=True)
|
| 166 |
|
| 167 |
file_path = os.path.join(data_folder, "all_cleaned_companies.json")
|
|
|
|
| 160 |
else:
|
| 161 |
print("All companies already have website URLs. Skipping wikisearch mode...")
|
| 162 |
print("Saving results...")
|
| 163 |
+
data_folder = "/tmp/data"
|
|
|
|
| 164 |
os.makedirs(data_folder, exist_ok=True)
|
| 165 |
|
| 166 |
file_path = os.path.join(data_folder, "all_cleaned_companies.json")
|
src/services/queryService.py
CHANGED
|
@@ -110,7 +110,7 @@ class QService:
|
|
| 110 |
# filename = f"companies_{self.industry_type}_{self.location}.json".replace(" ", "_").lower()
|
| 111 |
filename = "uncleaned_companies.json"
|
| 112 |
try:
|
| 113 |
-
with open(f"data/{filename}", 'w', encoding="utf-8") as f:
|
| 114 |
f.write(json_output)
|
| 115 |
print(f"Data successfully written to {filename}")
|
| 116 |
except Exception as e:
|
|
|
|
| 110 |
# filename = f"companies_{self.industry_type}_{self.location}.json".replace(" ", "_").lower()
|
| 111 |
filename = "uncleaned_companies.json"
|
| 112 |
try:
|
| 113 |
+
with open(f"/tmp/data/{filename}", 'w', encoding="utf-8") as f:
|
| 114 |
f.write(json_output)
|
| 115 |
print(f"Data successfully written to {filename}")
|
| 116 |
except Exception as e:
|