fixing bug in evaluation data setup
Browse files- src/app.py +1 -0
- src/core/eval.py +10 -8
- src/core/ingest.py +2 -2
- src/core/synthetic_data.py +2 -2
src/app.py
CHANGED
|
@@ -225,6 +225,7 @@ def setup_synthetic_data(collections: List[str]):
|
|
| 225 |
docs_length = setup_test_data(collections)
|
| 226 |
return f"β
Successfully ingested {docs_length} synthetic test data for: {', '.join(collections)}"
|
| 227 |
except Exception as e:
|
|
|
|
| 228 |
return f"β Error setting up test data: {str(e)}"
|
| 229 |
|
| 230 |
|
|
|
|
| 225 |
docs_length = setup_test_data(collections)
|
| 226 |
return f"β
Successfully ingested {docs_length} synthetic test data for: {', '.join(collections)}"
|
| 227 |
except Exception as e:
|
| 228 |
+
print(f"β Error setting up test data: {str(e)}")
|
| 229 |
return f"β Error setting up test data: {str(e)}"
|
| 230 |
|
| 231 |
|
src/core/eval.py
CHANGED
|
@@ -17,8 +17,9 @@ import numpy as np
|
|
| 17 |
from langchain_core.documents import Document
|
| 18 |
from langchain_openai import OpenAIEmbeddings
|
| 19 |
from dotenv import load_dotenv, find_dotenv
|
| 20 |
-
from .index import MetaData
|
| 21 |
from .retrieval import retrieval, generate
|
|
|
|
| 22 |
from .synthetic_data import SYNTHETIC_DOCUMENTS, EVAL_QUERIES, EvalQuery
|
| 23 |
|
| 24 |
find_dotenv()
|
|
@@ -437,16 +438,17 @@ def setup_test_data(collections: List[str] = None):
|
|
| 437 |
print(f"\nπ Ingesting {len(docs)} documents into '{collection_name}' collection...")
|
| 438 |
documents = []
|
| 439 |
for i, doc_data in enumerate(docs, 1):
|
| 440 |
-
|
| 441 |
metadata = doc_data["metadata"]
|
| 442 |
-
metadata['source_name'] = collection_name+"_eval"
|
| 443 |
doc = Document(page_content=doc_data["content"], metadata=metadata)
|
| 444 |
-
|
|
|
|
|
|
|
| 445 |
|
| 446 |
-
vectorstore = get_vectorstore(collection_name)
|
| 447 |
-
ids = [str(uuid.uuid4()) for _ in range(len(documents))]
|
| 448 |
-
vectorstore.add_documents(documents, ids=ids)
|
| 449 |
-
|
|
|
|
| 450 |
print(f"β Completed '{collection_name}' collection")
|
| 451 |
|
| 452 |
print("\n" + "="*70)
|
|
|
|
| 17 |
from langchain_core.documents import Document
|
| 18 |
from langchain_openai import OpenAIEmbeddings
|
| 19 |
from dotenv import load_dotenv, find_dotenv
|
| 20 |
+
from .index import MetaData
|
| 21 |
from .retrieval import retrieval, generate
|
| 22 |
+
from .ingest import ingest_documents, get_chunks
|
| 23 |
from .synthetic_data import SYNTHETIC_DOCUMENTS, EVAL_QUERIES, EvalQuery
|
| 24 |
|
| 25 |
find_dotenv()
|
|
|
|
| 438 |
print(f"\nπ Ingesting {len(docs)} documents into '{collection_name}' collection...")
|
| 439 |
documents = []
|
| 440 |
for i, doc_data in enumerate(docs, 1):
|
|
|
|
| 441 |
metadata = doc_data["metadata"]
|
|
|
|
| 442 |
doc = Document(page_content=doc_data["content"], metadata=metadata)
|
| 443 |
+
metadata = MetaData(**metadata)
|
| 444 |
+
chunks = get_chunks([doc], metadata)
|
| 445 |
+
documents.extend(chunks)
|
| 446 |
|
| 447 |
+
# vectorstore = get_vectorstore(collection_name)
|
| 448 |
+
# ids = [str(uuid.uuid4()) for _ in range(len(documents))]
|
| 449 |
+
# vectorstore.add_documents(documents, ids=ids)
|
| 450 |
+
ingest_documents(documents, collection_name)
|
| 451 |
+
tot_docs += len(docs)
|
| 452 |
print(f"β Completed '{collection_name}' collection")
|
| 453 |
|
| 454 |
print("\n" + "="*70)
|
src/core/ingest.py
CHANGED
|
@@ -49,8 +49,8 @@ def get_chunks(documents: List[Document], metadata: MetaData):
|
|
| 49 |
metadata={
|
| 50 |
"doc_id": doc_id,
|
| 51 |
"chunk_id": str(uuid.uuid4()),
|
| 52 |
-
"source_name": chunk.metadata
|
| 53 |
-
"start_index": chunk.metadata
|
| 54 |
**metadata.model_dump(),
|
| 55 |
},
|
| 56 |
)
|
|
|
|
| 49 |
metadata={
|
| 50 |
"doc_id": doc_id,
|
| 51 |
"chunk_id": str(uuid.uuid4()),
|
| 52 |
+
"source_name": chunk.metadata.get("source",'Not Available').split("/")[-1],
|
| 53 |
+
"start_index": chunk.metadata.get("start_index",0),
|
| 54 |
**metadata.model_dump(),
|
| 55 |
},
|
| 56 |
)
|
src/core/synthetic_data.py
CHANGED
|
@@ -704,11 +704,11 @@ SYNTHETIC_DOCUMENTS = {
|
|
| 704 |
},
|
| 705 |
{
|
| 706 |
"content": "Auto loan financing is available for new and used vehicle purchases with interest rates varying based on vehicle age, loan term, down payment, credit score, and lender relationship. New car loans currently range from 5.5% to 9.0% APR depending on creditworthiness, while used car loans carry slightly higher rates from 6.5% to 11.0% APR reflecting the additional risk associated with older vehicles. Loan terms typically range from 36 to 84 months, though financial advisors generally recommend shorter terms to avoid owing more than the vehicle's value as depreciation outpaces principal reduction. Longer terms reduce monthly payments but substantially increase total interest costs over the loan life. Down payments of at least 20% for new vehicles and 10% for used vehicles are recommended to establish positive equity immediately and potentially qualify for better rates. Credit unions often offer rates 0.5-1.0% below traditional banks for members in good standing. Dealer financing may provide promotional rates as low as 0% APR for limited periods on select new models, though these offers typically require excellent credit and may limit negotiation on purchase price. Pre-approval from banks or credit unions provides negotiating leverage at dealerships and clarifies budget constraints before shopping. Gap insurance protects against total loss situations where insurance payouts don't cover outstanding loan balances. Extended warranties and ancillary products offered by dealers often carry high markups and should be carefully evaluated. Refinancing existing auto loans becomes attractive when rates drop significantly or credit scores improve substantially after the original purchase. Early payoff typically incurs no penalties allowing aggressive principal reduction strategies. Trade-in vehicles with outstanding loans require payoff before completing new purchases with equity or deficiency rolled into new financing when permitted.",
|
| 707 |
-
"metadata": {"language": "en", "domain": "Finance", "section": "Loans","topic": "Interest Rates", "doc_type": "manual"},
|
| 708 |
},
|
| 709 |
{
|
| 710 |
"content": "Q: How are loan interest rates calculated and what factors influence the rate I'll receive? A: Loan interest rates are determined through a complex evaluation process considering multiple risk factors that help lenders assess the likelihood of full and timely repayment. The starting point is typically the prime rate for many consumer loans or the current treasury yield for mortgages, which reflects general market conditions and Federal Reserve monetary policy. Lenders then add a margin based on your individual credit profile. Your credit score is the single most important factor, with scores above 760 qualifying for the best rates while scores below 620 face substantially higher rates or possible denial. Credit history length, payment history, credit utilization ratios, and recent credit inquiries all contribute to this assessment. Loan-to-value ratio matters significantly for secured loans like mortgages and auto loans, with larger down payments reducing lender risk and often qualifying for better rates. Debt-to-income ratio measures your ability to handle additional payments, with lower ratios under 36% preferred by most lenders. Loan term length affects rates because longer terms expose lenders to more years of potential default risk and interest rate fluctuation, resulting in higher rates for 30-year mortgages compared to 15-year options. Employment stability and income verification demonstrate repayment capacity. Collateral type and quality impact secured loan rates, with new cars receiving better rates than older used vehicles. Finally, your existing relationship with the lender, including deposit accounts and autopay enrollment, may qualify you for rate discounts. Understanding these factors empowers you to improve your rate potential before applying.",
|
| 711 |
-
"metadata": {"language": "en", "domain": "Finance", "section": "Loans","topic": "Interest Rates", "doc_type": "faq"},
|
| 712 |
},
|
| 713 |
# Compliance + Accounts + Security (5 examples)
|
| 714 |
{
|
|
|
|
| 704 |
},
|
| 705 |
{
|
| 706 |
"content": "Auto loan financing is available for new and used vehicle purchases with interest rates varying based on vehicle age, loan term, down payment, credit score, and lender relationship. New car loans currently range from 5.5% to 9.0% APR depending on creditworthiness, while used car loans carry slightly higher rates from 6.5% to 11.0% APR reflecting the additional risk associated with older vehicles. Loan terms typically range from 36 to 84 months, though financial advisors generally recommend shorter terms to avoid owing more than the vehicle's value as depreciation outpaces principal reduction. Longer terms reduce monthly payments but substantially increase total interest costs over the loan life. Down payments of at least 20% for new vehicles and 10% for used vehicles are recommended to establish positive equity immediately and potentially qualify for better rates. Credit unions often offer rates 0.5-1.0% below traditional banks for members in good standing. Dealer financing may provide promotional rates as low as 0% APR for limited periods on select new models, though these offers typically require excellent credit and may limit negotiation on purchase price. Pre-approval from banks or credit unions provides negotiating leverage at dealerships and clarifies budget constraints before shopping. Gap insurance protects against total loss situations where insurance payouts don't cover outstanding loan balances. Extended warranties and ancillary products offered by dealers often carry high markups and should be carefully evaluated. Refinancing existing auto loans becomes attractive when rates drop significantly or credit scores improve substantially after the original purchase. Early payoff typically incurs no penalties allowing aggressive principal reduction strategies. Trade-in vehicles with outstanding loans require payoff before completing new purchases with equity or deficiency rolled into new financing when permitted.",
|
| 707 |
+
"metadata": {"language": "en", "domain": "Finance", "section": "Loans", "topic": "Interest Rates", "doc_type": "manual"},
|
| 708 |
},
|
| 709 |
{
|
| 710 |
"content": "Q: How are loan interest rates calculated and what factors influence the rate I'll receive? A: Loan interest rates are determined through a complex evaluation process considering multiple risk factors that help lenders assess the likelihood of full and timely repayment. The starting point is typically the prime rate for many consumer loans or the current treasury yield for mortgages, which reflects general market conditions and Federal Reserve monetary policy. Lenders then add a margin based on your individual credit profile. Your credit score is the single most important factor, with scores above 760 qualifying for the best rates while scores below 620 face substantially higher rates or possible denial. Credit history length, payment history, credit utilization ratios, and recent credit inquiries all contribute to this assessment. Loan-to-value ratio matters significantly for secured loans like mortgages and auto loans, with larger down payments reducing lender risk and often qualifying for better rates. Debt-to-income ratio measures your ability to handle additional payments, with lower ratios under 36% preferred by most lenders. Loan term length affects rates because longer terms expose lenders to more years of potential default risk and interest rate fluctuation, resulting in higher rates for 30-year mortgages compared to 15-year options. Employment stability and income verification demonstrate repayment capacity. Collateral type and quality impact secured loan rates, with new cars receiving better rates than older used vehicles. Finally, your existing relationship with the lender, including deposit accounts and autopay enrollment, may qualify you for rate discounts. Understanding these factors empowers you to improve your rate potential before applying.",
|
| 711 |
+
"metadata": {"language": "en", "domain": "Finance", "section": "Loans", "topic": "Interest Rates", "doc_type": "faq"},
|
| 712 |
},
|
| 713 |
# Compliance + Accounts + Security (5 examples)
|
| 714 |
{
|