ikram98ai commited on
Commit
0f1a143
Β·
1 Parent(s): d8dedbc

fixing bug in evaluation data setup

Browse files
src/app.py CHANGED
@@ -225,6 +225,7 @@ def setup_synthetic_data(collections: List[str]):
225
  docs_length = setup_test_data(collections)
226
  return f"βœ… Successfully ingested {docs_length} synthetic test data for: {', '.join(collections)}"
227
  except Exception as e:
 
228
  return f"❌ Error setting up test data: {str(e)}"
229
 
230
 
 
225
  docs_length = setup_test_data(collections)
226
  return f"βœ… Successfully ingested {docs_length} synthetic test data for: {', '.join(collections)}"
227
  except Exception as e:
228
+ print(f"❌ Error setting up test data: {str(e)}")
229
  return f"❌ Error setting up test data: {str(e)}"
230
 
231
 
src/core/eval.py CHANGED
@@ -17,8 +17,9 @@ import numpy as np
17
  from langchain_core.documents import Document
18
  from langchain_openai import OpenAIEmbeddings
19
  from dotenv import load_dotenv, find_dotenv
20
- from .index import MetaData, get_vectorstore
21
  from .retrieval import retrieval, generate
 
22
  from .synthetic_data import SYNTHETIC_DOCUMENTS, EVAL_QUERIES, EvalQuery
23
 
24
  find_dotenv()
@@ -437,16 +438,17 @@ def setup_test_data(collections: List[str] = None):
437
  print(f"\nπŸ“š Ingesting {len(docs)} documents into '{collection_name}' collection...")
438
  documents = []
439
  for i, doc_data in enumerate(docs, 1):
440
-
441
  metadata = doc_data["metadata"]
442
- metadata['source_name'] = collection_name+"_eval"
443
  doc = Document(page_content=doc_data["content"], metadata=metadata)
444
- documents.append(doc)
 
 
445
 
446
- vectorstore = get_vectorstore(collection_name)
447
- ids = [str(uuid.uuid4()) for _ in range(len(documents))]
448
- vectorstore.add_documents(documents, ids=ids)
449
- tot_docs += len(documents)
 
450
  print(f"βœ“ Completed '{collection_name}' collection")
451
 
452
  print("\n" + "="*70)
 
17
  from langchain_core.documents import Document
18
  from langchain_openai import OpenAIEmbeddings
19
  from dotenv import load_dotenv, find_dotenv
20
+ from .index import MetaData
21
  from .retrieval import retrieval, generate
22
+ from .ingest import ingest_documents, get_chunks
23
  from .synthetic_data import SYNTHETIC_DOCUMENTS, EVAL_QUERIES, EvalQuery
24
 
25
  find_dotenv()
 
438
  print(f"\nπŸ“š Ingesting {len(docs)} documents into '{collection_name}' collection...")
439
  documents = []
440
  for i, doc_data in enumerate(docs, 1):
 
441
  metadata = doc_data["metadata"]
 
442
  doc = Document(page_content=doc_data["content"], metadata=metadata)
443
+ metadata = MetaData(**metadata)
444
+ chunks = get_chunks([doc], metadata)
445
+ documents.extend(chunks)
446
 
447
+ # vectorstore = get_vectorstore(collection_name)
448
+ # ids = [str(uuid.uuid4()) for _ in range(len(documents))]
449
+ # vectorstore.add_documents(documents, ids=ids)
450
+ ingest_documents(documents, collection_name)
451
+ tot_docs += len(docs)
452
  print(f"βœ“ Completed '{collection_name}' collection")
453
 
454
  print("\n" + "="*70)
src/core/ingest.py CHANGED
@@ -49,8 +49,8 @@ def get_chunks(documents: List[Document], metadata: MetaData):
49
  metadata={
50
  "doc_id": doc_id,
51
  "chunk_id": str(uuid.uuid4()),
52
- "source_name": chunk.metadata["source"].split("/")[-1],
53
- "start_index": chunk.metadata["start_index"],
54
  **metadata.model_dump(),
55
  },
56
  )
 
49
  metadata={
50
  "doc_id": doc_id,
51
  "chunk_id": str(uuid.uuid4()),
52
+ "source_name": chunk.metadata.get("source",'Not Available').split("/")[-1],
53
+ "start_index": chunk.metadata.get("start_index",0),
54
  **metadata.model_dump(),
55
  },
56
  )
src/core/synthetic_data.py CHANGED
@@ -704,11 +704,11 @@ SYNTHETIC_DOCUMENTS = {
704
  },
705
  {
706
  "content": "Auto loan financing is available for new and used vehicle purchases with interest rates varying based on vehicle age, loan term, down payment, credit score, and lender relationship. New car loans currently range from 5.5% to 9.0% APR depending on creditworthiness, while used car loans carry slightly higher rates from 6.5% to 11.0% APR reflecting the additional risk associated with older vehicles. Loan terms typically range from 36 to 84 months, though financial advisors generally recommend shorter terms to avoid owing more than the vehicle's value as depreciation outpaces principal reduction. Longer terms reduce monthly payments but substantially increase total interest costs over the loan life. Down payments of at least 20% for new vehicles and 10% for used vehicles are recommended to establish positive equity immediately and potentially qualify for better rates. Credit unions often offer rates 0.5-1.0% below traditional banks for members in good standing. Dealer financing may provide promotional rates as low as 0% APR for limited periods on select new models, though these offers typically require excellent credit and may limit negotiation on purchase price. Pre-approval from banks or credit unions provides negotiating leverage at dealerships and clarifies budget constraints before shopping. Gap insurance protects against total loss situations where insurance payouts don't cover outstanding loan balances. Extended warranties and ancillary products offered by dealers often carry high markups and should be carefully evaluated. Refinancing existing auto loans becomes attractive when rates drop significantly or credit scores improve substantially after the original purchase. Early payoff typically incurs no penalties allowing aggressive principal reduction strategies. Trade-in vehicles with outstanding loans require payoff before completing new purchases with equity or deficiency rolled into new financing when permitted.",
707
- "metadata": {"language": "en", "domain": "Finance", "section": "Loans","topic": "Interest Rates", "doc_type": "manual"},
708
  },
709
  {
710
  "content": "Q: How are loan interest rates calculated and what factors influence the rate I'll receive? A: Loan interest rates are determined through a complex evaluation process considering multiple risk factors that help lenders assess the likelihood of full and timely repayment. The starting point is typically the prime rate for many consumer loans or the current treasury yield for mortgages, which reflects general market conditions and Federal Reserve monetary policy. Lenders then add a margin based on your individual credit profile. Your credit score is the single most important factor, with scores above 760 qualifying for the best rates while scores below 620 face substantially higher rates or possible denial. Credit history length, payment history, credit utilization ratios, and recent credit inquiries all contribute to this assessment. Loan-to-value ratio matters significantly for secured loans like mortgages and auto loans, with larger down payments reducing lender risk and often qualifying for better rates. Debt-to-income ratio measures your ability to handle additional payments, with lower ratios under 36% preferred by most lenders. Loan term length affects rates because longer terms expose lenders to more years of potential default risk and interest rate fluctuation, resulting in higher rates for 30-year mortgages compared to 15-year options. Employment stability and income verification demonstrate repayment capacity. Collateral type and quality impact secured loan rates, with new cars receiving better rates than older used vehicles. Finally, your existing relationship with the lender, including deposit accounts and autopay enrollment, may qualify you for rate discounts. Understanding these factors empowers you to improve your rate potential before applying.",
711
- "metadata": {"language": "en", "domain": "Finance", "section": "Loans","topic": "Interest Rates", "doc_type": "faq"},
712
  },
713
  # Compliance + Accounts + Security (5 examples)
714
  {
 
704
  },
705
  {
706
  "content": "Auto loan financing is available for new and used vehicle purchases with interest rates varying based on vehicle age, loan term, down payment, credit score, and lender relationship. New car loans currently range from 5.5% to 9.0% APR depending on creditworthiness, while used car loans carry slightly higher rates from 6.5% to 11.0% APR reflecting the additional risk associated with older vehicles. Loan terms typically range from 36 to 84 months, though financial advisors generally recommend shorter terms to avoid owing more than the vehicle's value as depreciation outpaces principal reduction. Longer terms reduce monthly payments but substantially increase total interest costs over the loan life. Down payments of at least 20% for new vehicles and 10% for used vehicles are recommended to establish positive equity immediately and potentially qualify for better rates. Credit unions often offer rates 0.5-1.0% below traditional banks for members in good standing. Dealer financing may provide promotional rates as low as 0% APR for limited periods on select new models, though these offers typically require excellent credit and may limit negotiation on purchase price. Pre-approval from banks or credit unions provides negotiating leverage at dealerships and clarifies budget constraints before shopping. Gap insurance protects against total loss situations where insurance payouts don't cover outstanding loan balances. Extended warranties and ancillary products offered by dealers often carry high markups and should be carefully evaluated. Refinancing existing auto loans becomes attractive when rates drop significantly or credit scores improve substantially after the original purchase. Early payoff typically incurs no penalties allowing aggressive principal reduction strategies. Trade-in vehicles with outstanding loans require payoff before completing new purchases with equity or deficiency rolled into new financing when permitted.",
707
+ "metadata": {"language": "en", "domain": "Finance", "section": "Loans", "topic": "Interest Rates", "doc_type": "manual"},
708
  },
709
  {
710
  "content": "Q: How are loan interest rates calculated and what factors influence the rate I'll receive? A: Loan interest rates are determined through a complex evaluation process considering multiple risk factors that help lenders assess the likelihood of full and timely repayment. The starting point is typically the prime rate for many consumer loans or the current treasury yield for mortgages, which reflects general market conditions and Federal Reserve monetary policy. Lenders then add a margin based on your individual credit profile. Your credit score is the single most important factor, with scores above 760 qualifying for the best rates while scores below 620 face substantially higher rates or possible denial. Credit history length, payment history, credit utilization ratios, and recent credit inquiries all contribute to this assessment. Loan-to-value ratio matters significantly for secured loans like mortgages and auto loans, with larger down payments reducing lender risk and often qualifying for better rates. Debt-to-income ratio measures your ability to handle additional payments, with lower ratios under 36% preferred by most lenders. Loan term length affects rates because longer terms expose lenders to more years of potential default risk and interest rate fluctuation, resulting in higher rates for 30-year mortgages compared to 15-year options. Employment stability and income verification demonstrate repayment capacity. Collateral type and quality impact secured loan rates, with new cars receiving better rates than older used vehicles. Finally, your existing relationship with the lender, including deposit accounts and autopay enrollment, may qualify you for rate discounts. Understanding these factors empowers you to improve your rate potential before applying.",
711
+ "metadata": {"language": "en", "domain": "Finance", "section": "Loans", "topic": "Interest Rates", "doc_type": "faq"},
712
  },
713
  # Compliance + Accounts + Security (5 examples)
714
  {