Spaces:

PhoenixDecim
/

slm_financial_rag

Running

App Files Files Community

PhoenixDecim commited on Mar 15, 2025

Commit

18d1c8f

1 Parent(s): 2100725

Added penalty for reasoning and future prediction questions

Browse files

Files changed (2) hide show

app.py +42 -7
data_filters.py +4 -0

app.py CHANGED Viewed

@@ -24,6 +24,7 @@ from data_filters import (
     FINANCIAL_ENTITY_LABELS,
     GENERAL_KNOWLEDGE_PATTERNS,
     sensitive_terms,
     FINANCIAL_TERMS,
 )
@@ -266,6 +267,15 @@ def is_general_knowledge_query(query):
     return False
 def is_irrelevant_query(query):
     """Check if the query is not finance related"""
     # If the query is general knowledge and not finance-related
@@ -365,6 +375,20 @@ def compute_entropy(logits):
     return entropy.mean().item()
 # A confidence score is computed using FAISS and BM25 ranking
 # FAISS: The similarity score between the response and the retrieved chunks are normalized
 # BM25: The BM25 scores for the query and response combined tokens is normalized
@@ -375,12 +399,14 @@ def compute_response_confidence(
     response,
     retrieved_chunks,
     bm25,
-    model_conf_signal=0.5,
-    lambda_faiss=0.4,
-    lambda_conf=0.4,
-    lambda_bm25=1.8,
 ):
-    """Calculates a confidence score using FAISS, BM25, top token probabilites and entropy score"""
     if not retrieved_chunks:
         return 0.0
     # Compute FAISS similarity
@@ -406,15 +432,24 @@ def compute_response_confidence(
         normalized_bm25 = max(0, min(1, normalized_bm25))
     else:
         normalized_bm25 = 0.0
     logger.info(
-        f"Faiss score: {normalized_faiss}, bm25: {normalized_bm25}, "
-        f"Mean Top Token + 1-Entropy Avg: {model_conf_signal}"
     )
     # Weighted sum of all the normalized scores
     confidence_score = (
         lambda_faiss * normalized_faiss
         + model_conf_signal * lambda_conf
         + lambda_bm25 * normalized_bm25
     )
     return round(min(100, max(0, confidence_score.item() * 100)), 2)

     FINANCIAL_ENTITY_LABELS,
     GENERAL_KNOWLEDGE_PATTERNS,
     sensitive_terms,
+    EXPLANATORY_PATTERNS,
     FINANCIAL_TERMS,
 )
     return False
+def get_latest_available_year(retrieved_chunks):
+    """Extracts the latest available year from retrieved financial data"""
+    years = set()
+    year_pattern = r"\b(20\d{2})\b"
+    for chunk in retrieved_chunks:
+        years.update(map(int, re.findall(year_pattern, chunk)))
+    return max(years) if years else 2024
 def is_irrelevant_query(query):
     """Check if the query is not finance related"""
     # If the query is general knowledge and not finance-related
     return entropy.mean().item()
+def contains_future_year(query, retrieved_chunks):
+    """Detects if the query asks for future data beyond available reports"""
+    latest_year = get_latest_available_year(retrieved_chunks)
+    # Extract years from query
+    future_years = set(map(int, re.findall(r"\b(20\d{2})\b", query)))
+    return any(year > latest_year for year in future_years)
+def is_explanatory_query(query):
+    """Checks if the query requires an explanation rather than factual data"""
+    query_lower = query.lower()
+    return any(re.search(pattern, query_lower) for pattern in EXPLANATORY_PATTERNS)
 # A confidence score is computed using FAISS and BM25 ranking
 # FAISS: The similarity score between the response and the retrieved chunks are normalized
 # BM25: The BM25 scores for the query and response combined tokens is normalized
     response,
     retrieved_chunks,
     bm25,
+    model_conf_signal,
+    lambda_faiss=0.6,
+    lambda_conf=0.3,
+    lambda_bm25=1.0,
+    future_penalty=-0.3,
+    explanation_penalty=-0.2,
 ):
+    """Calculates a confidence score for the model response"""
     if not retrieved_chunks:
         return 0.0
     # Compute FAISS similarity
         normalized_bm25 = max(0, min(1, normalized_bm25))
     else:
         normalized_bm25 = 0.0
+    # Penalize if query contains future years
+    future_penalty = -0.3 if contains_future_year(query, retrieved_chunks) else 0.0
+    # Penalize if query is reasoning based
+    explanation_penalty_value = (
+        explanation_penalty if is_explanatory_query(query) else 0.0
+    )
     logger.info(
+        f"Faiss score: {normalized_faiss}, BM25: {normalized_bm25}\n"
+        f"Mean Top Token + 1-Entropy Avg: {model_conf_signal}\n"
+        f"Future penalty: {future_penalty}, Reasoning penalty: {explanation_penalty_value}"
     )
     # Weighted sum of all the normalized scores
     confidence_score = (
         lambda_faiss * normalized_faiss
         + model_conf_signal * lambda_conf
         + lambda_bm25 * normalized_bm25
+        + future_penalty
+        + explanation_penalty_value
     )
     return round(min(100, max(0, confidence_score.item() * 100)), 2)

data_filters.py CHANGED Viewed

@@ -48,6 +48,10 @@ sensitive_terms = {
     "wages",
 }
 FINANCIAL_DATA_PATTERNS = (
     r"\b(\₹?\s?\d{1,3}(?:,\d{2,3})*(?:\.\d+)?\s*(million|billion|crore|lakh|%)"

     "wages",
 }
+EXPLANATORY_PATTERNS = [
+    r"\b(why|reason|cause|explanation|due to|because|factor|impact of|effect of|influence of|driven by)\b",
+    r"\b(how did|what led to|what caused|why did|how was|contributing factor|explain)\b",
+]
 FINANCIAL_DATA_PATTERNS = (
     r"\b(\₹?\s?\d{1,3}(?:,\d{2,3})*(?:\.\d+)?\s*(million|billion|crore|lakh|%)"