Spaces:
Sleeping
Sleeping
Update evaluate.py
Browse files- evaluate.py +28 -3
evaluate.py
CHANGED
|
@@ -9,22 +9,47 @@ from typing import List, Dict, Any
|
|
| 9 |
from pathlib import Path
|
| 10 |
|
| 11 |
# --- Imports from the main application ---
|
|
|
|
|
|
|
| 12 |
try:
|
| 13 |
from alz_companion.agent import (
|
| 14 |
make_rag_chain, route_query_type, detect_tags_from_query,
|
| 15 |
-
answer_query, call_llm, build_or_load_vectorstore
|
| 16 |
)
|
| 17 |
from alz_companion.prompts import FAITHFULNESS_JUDGE_PROMPT
|
| 18 |
from langchain_community.vectorstores import FAISS
|
|
|
|
|
|
|
|
|
|
| 19 |
except ImportError:
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
def make_rag_chain(*args, **kwargs): return lambda q, **k: {"answer": f"(Eval Fallback) You asked: {q}", "sources": []}
|
| 22 |
-
def route_query_type(q): return "general_conversation"
|
| 23 |
def detect_tags_from_query(*args, **kwargs): return {}
|
| 24 |
def answer_query(chain, q, **kwargs): return chain(q, **kwargs)
|
| 25 |
def call_llm(*args, **kwargs): return "{}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
FAITHFULNESS_JUDGE_PROMPT = ""
|
| 27 |
print("WARNING: Could not import from alz_companion. Evaluation functions will use fallbacks.")
|
|
|
|
|
|
|
| 28 |
|
| 29 |
# --- LLM-as-a-Judge Prompt for Answer Correctness ---
|
| 30 |
ANSWER_CORRECTNESS_JUDGE_PROMPT = """You are an expert evaluator. Your task is to assess the factual correctness of a generated answer against a ground truth answer.
|
|
|
|
| 9 |
from pathlib import Path
|
| 10 |
|
| 11 |
# --- Imports from the main application ---
|
| 12 |
+
# In evaluate.py
|
| 13 |
+
|
| 14 |
try:
|
| 15 |
from alz_companion.agent import (
|
| 16 |
make_rag_chain, route_query_type, detect_tags_from_query,
|
| 17 |
+
answer_query, call_llm, build_or_load_vectorstore
|
| 18 |
)
|
| 19 |
from alz_companion.prompts import FAITHFULNESS_JUDGE_PROMPT
|
| 20 |
from langchain_community.vectorstores import FAISS
|
| 21 |
+
# --- Also move this import inside the try block for consistency ---
|
| 22 |
+
from langchain.schema import Document
|
| 23 |
+
|
| 24 |
except ImportError:
|
| 25 |
+
# --- START: FALLBACK DEFINITIONS ---
|
| 26 |
+
class FAISS:
|
| 27 |
+
def __init__(self): self.docstore = type('obj', (object,), {'_dict': {}})()
|
| 28 |
+
def add_documents(self, docs): pass
|
| 29 |
+
def save_local(self, path): pass
|
| 30 |
+
@classmethod
|
| 31 |
+
def from_documents(cls, docs, embeddings=None): return cls()
|
| 32 |
+
|
| 33 |
+
class Document:
|
| 34 |
+
def __init__(self, page_content, metadata=None):
|
| 35 |
+
self.page_content = page_content
|
| 36 |
+
self.metadata = metadata or {}
|
| 37 |
+
|
| 38 |
def make_rag_chain(*args, **kwargs): return lambda q, **k: {"answer": f"(Eval Fallback) You asked: {q}", "sources": []}
|
| 39 |
+
def route_query_type(q, **kwargs): return "general_conversation"
|
| 40 |
def detect_tags_from_query(*args, **kwargs): return {}
|
| 41 |
def answer_query(chain, q, **kwargs): return chain(q, **kwargs)
|
| 42 |
def call_llm(*args, **kwargs): return "{}"
|
| 43 |
+
|
| 44 |
+
# --- ADD FALLBACK DEFINITION FOR THE MISSING FUNCTION ---
|
| 45 |
+
def build_or_load_vectorstore(docs, index_path, is_personal=False):
|
| 46 |
+
return FAISS()
|
| 47 |
+
# --- END OF ADDITION ---
|
| 48 |
+
|
| 49 |
FAITHFULNESS_JUDGE_PROMPT = ""
|
| 50 |
print("WARNING: Could not import from alz_companion. Evaluation functions will use fallbacks.")
|
| 51 |
+
# --- END: FALLBACK DEFINITIONS ---
|
| 52 |
+
|
| 53 |
|
| 54 |
# --- LLM-as-a-Judge Prompt for Answer Correctness ---
|
| 55 |
ANSWER_CORRECTNESS_JUDGE_PROMPT = """You are an expert evaluator. Your task is to assess the factual correctness of a generated answer against a ground truth answer.
|