audit_assistant

Sleeping

App Files Files Community

mtyrrell commited on Feb 12, 2025

Commit

c997974

1 Parent(s): a1e5650

feedback functionality completed (using some test elements)

Browse files

Files changed (15) hide show

.gitignore +7 -1
app.py +162 -54
auditqa/__pycache__/__init__.cpython-310.pyc +0 -0
auditqa/__pycache__/process_chunks.cpython-310.pyc +0 -0
auditqa/__pycache__/reader.cpython-310.pyc +0 -0
auditqa/__pycache__/reports.cpython-310.pyc +0 -0
auditqa/__pycache__/retriever.cpython-310.pyc +0 -0
auditqa/__pycache__/sample_questions.cpython-310.pyc +0 -0
auditqa/__pycache__/utils.cpython-310.pyc +0 -0
auditqa/process_chunks.py +15 -6
auditqa/reader.py +38 -27
auditqa/retriever.py +12 -1
auditqa/utils.py +6 -2
model_params.cfg +2 -2
style.css +12 -1

.gitignore CHANGED Viewed

@@ -1,2 +1,8 @@
 .DS_store
-/testing/

 .DS_store
+.env
+/testing/
+/logs/
+logging_config.py
+/data/
+app_interactions.jsonl
+auditqa/__pycache__/

app.py CHANGED Viewed

@@ -14,25 +14,57 @@ from auditqa.retriever import get_context
 from auditqa.reader import nvidia_client, dedicated_endpoint
 from auditqa.utils import make_html_source, parse_output_llm_with_sources, save_logs, get_message_template
 from dotenv import load_dotenv
 load_dotenv()
-# fetch tokens and model config params
-SPACES_LOG = os.environ["SPACES_LOG"]
 model_config = getconfig("model_params.cfg")
-# create the local logs repo
-JSON_DATASET_DIR = Path("json_dataset")
-JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
-JSON_DATASET_PATH = JSON_DATASET_DIR / f"logs-{uuid4()}.json"
 # the logs are written to dataset repo periodically from local logs
 # https://huggingface.co/spaces/Wauplin/space_to_dataset_saver
-scheduler = CommitScheduler(
-     repo_id="GIZ/spaces_logs",
-     repo_type="dataset",
-     folder_path=JSON_DATASET_DIR,
-     path_in_repo="audit_chatbot",
-     token=SPACES_LOG )
 #####--------------- VECTOR STORE -------------------------------------------------
 # reports contain the already created chunks from Markdown version of pdf reports
@@ -40,7 +72,7 @@ scheduler = CommitScheduler(
 # We need to create the local vectorstore collection once using load_chunks
 # vectorestore colection are stored on persistent storage so this needs to be run only once
 # hence, comment out line below when creating for first time
-#vectorstores = load_new_chunks()
 # once the vectore embeddings  are created we will use qdrant client to access these
 vectorstores = get_local_qdrant()
@@ -53,6 +85,20 @@ def start_chat(query,history):
 def finish_chat():
     return (gr.update(interactive = True,value = ""))
 async def chat(query,history,sources,reports,subtype,year):
     """taking a query and a message history, use a pipeline (reformulation, retriever, answering)
        to yield a tuple of:(messages in gradio format/messages in langchain format, source documents)
@@ -71,6 +117,7 @@ async def chat(query,history,sources,reports,subtype,year):
     vectorstore = vectorstores["allreports"]
     ##------------------------------get context----------------------------------------------
     context_retrieved = get_context(vectorstore=vectorstore,query=query,reports=reports,
                                                 sources=sources,subtype=subtype,year=year)
     context_retrieved_formatted = "||".join(doc.page_content for doc in context_retrieved)
@@ -111,6 +158,23 @@ async def chat(query,history,sources,reports,subtype,year):
     ##-----------------------get answer from endpoints------------------------------
     answer_yet = ""
     if model_config.get('reader','TYPE') == 'NVIDIA':
         chat_model = nvidia_client()
         async def process_stream():
@@ -130,49 +194,53 @@ async def chat(query,history,sources,reports,subtype,year):
                     answer_yet += token
                     parsed_answer = parse_output_llm_with_sources(answer_yet)
                     history[-1] = (query, parsed_answer)
-                yield [tuple(x) for x in history], docs_html
         # Stream the response updates
         async for update in process_stream():
             yield update
     else:
-        chat_model = dedicated_endpoint()
         async def process_stream():
-        # Without nonlocal, Python would create a new local variable answer_yet inside process_stream(),
-        # instead of modifying the one from the outer scope.
-            nonlocal answer_yet # Use the outer scope's answer_yet variable
-            # Iterate over the streaming response chunks
-            async for chunk in chat_model.astream(messages):
-                token = chunk.content
-                answer_yet += token
-                parsed_answer = parse_output_llm_with_sources(answer_yet)
-                history[-1] = (query, parsed_answer)
-                yield [tuple(x) for x in history], docs_html
-        # Stream the response updates
         async for update in process_stream():
             yield update
-    # logging the event
     try:
-        timestamp = str(datetime.now().timestamp())
-        logs = {
-                "system_prompt": SYSTEM_PROMPT,
-                "sources":sources,
-                "reports":reports,
-                "subtype":subtype,
-                "year":year,
-                "question":query,
-                "sources":sources,
-                "retriever":model_config.get('retriever','MODEL'),
-                "endpoint_type":model_config.get('reader','TYPE'),
-                "raeder":model_config.get('reader','NVIDIA_MODEL'),
-                "docs":[doc.page_content for doc in context_retrieved],
-                "answer": history[-1][1],
-                "time": timestamp,
-            }
-        save_logs(scheduler,JSON_DATASET_PATH,logs)
     except Exception as e:
         logging.error(e)
@@ -378,21 +446,61 @@ with gr.Blocks(title="Audit Q&A", css= "style.css", theme=theme,elem_id = "main-
-    # using event listeners for 1. query box 2. click on example question
     # https://www.gradio.app/docs/gradio/textbox#event-listeners-arguments
     (textbox
-    .submit(start_chat, [textbox, chatbot], [textbox, tabs, chatbot], queue=False, api_name="start_chat_textbox")
-    # queue must be set as False (default) so the process is not waiting for another to be finished
-    .then(chat, [textbox, chatbot, dropdown_sources, dropdown_reports, dropdown_category, dropdown_year], [chatbot, sources_textbox], queue=True, concurrency_limit=8, api_name="chat_textbox")
-    .then(finish_chat, None, [textbox], api_name="finish_chat_textbox"))
     (examples_hidden
         .change(start_chat, [examples_hidden, chatbot], [textbox, tabs, chatbot], queue=False, api_name="start_chat_examples")
         # queue must be set as False (default) so the process is not waiting for another to be finished
-        .then(chat, [examples_hidden, chatbot, dropdown_sources, dropdown_reports, dropdown_category, dropdown_year], [chatbot, sources_textbox], concurrency_limit=8, api_name="chat_examples")
-        .then(finish_chat, None, [textbox], api_name="finish_chat_examples")
-    )
     demo.queue()
-demo.launch()

 from auditqa.reader import nvidia_client, dedicated_endpoint
 from auditqa.utils import make_html_source, parse_output_llm_with_sources, save_logs, get_message_template
 from dotenv import load_dotenv
+from threading import Lock
+import json
+from functools import partial
+# TESTING DEBUG LOG
+from auditqa.logging_config import setup_logging
+setup_logging()
+import logging
+logger = logging.getLogger(__name__)
 load_dotenv()
+# # fetch tokens and model config params
+# SPACES_LOG = os.environ["SPACES_LOG"]
+# model_config = getconfig("model_params.cfg")
+# # create the local logs repo
+# JSON_DATASET_DIR = Path("json_dataset")
+# JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
+# JSON_DATASET_PATH = JSON_DATASET_DIR / f"logs-{uuid4()}.json"
+# # the logs are written to dataset repo periodically from local logs
+# # https://huggingface.co/spaces/Wauplin/space_to_dataset_saver
+# scheduler = CommitScheduler(
+#      repo_id="GIZ/spaces_logs",
+#      repo_type="dataset",
+#      folder_path=JSON_DATASET_DIR,
+#      path_in_repo="audit_chatbot",
+#      token=SPACES_LOG )
+# Make logging optional
+SPACES_LOG = os.getenv("SPACES_LOG", "app_interactions.jsonl")  # TESTING: local logging setup
 model_config = getconfig("model_params.cfg")
+# TESTING: local logging setup
+class LocalScheduler:
+    def __init__(self, filepath):
+        self.filepath = Path(filepath)
+        self.lock = Lock()
+        # Create the file if it doesn't exist
+        if not self.filepath.exists():
+            with self.filepath.open('w') as f:
+                f.write('')
+# Instead of HuggingFace CommitScheduler, use local scheduler
+scheduler = LocalScheduler(SPACES_LOG) # TESTING: local logging setup
+JSON_DATASET_PATH = Path(SPACES_LOG) # TESTING: local logging setup
 # the logs are written to dataset repo periodically from local logs
 # https://huggingface.co/spaces/Wauplin/space_to_dataset_saver
 #####--------------- VECTOR STORE -------------------------------------------------
 # reports contain the already created chunks from Markdown version of pdf reports
 # We need to create the local vectorstore collection once using load_chunks
 # vectorestore colection are stored on persistent storage so this needs to be run only once
 # hence, comment out line below when creating for first time
+# vectorstores = load_new_chunks()
 # once the vectore embeddings  are created we will use qdrant client to access these
 vectorstores = get_local_qdrant()
 def finish_chat():
     return (gr.update(interactive = True,value = ""))
+def submit_feedback(feedback, logs_data):
+    """Handle feedback submission"""
+    try:
+        if logs_data is None:
+            logger.error("No logs data available for feedback")
+            return gr.update(visible=False), gr.update(visible=True)
+        save_logs(scheduler, JSON_DATASET_PATH, logs_data, feedback)
+        return gr.update(visible=False), gr.update(visible=True)
+    except Exception as e:
+        logger.error(f"Error saving feedback: {e}")
+        # Still need to return the expected outputs even on error
+        return gr.update(visible=False), gr.update(visible=True)
 async def chat(query,history,sources,reports,subtype,year):
     """taking a query and a message history, use a pipeline (reformulation, retriever, answering)
        to yield a tuple of:(messages in gradio format/messages in langchain format, source documents)
     vectorstore = vectorstores["allreports"]
     ##------------------------------get context----------------------------------------------
     context_retrieved = get_context(vectorstore=vectorstore,query=query,reports=reports,
                                                 sources=sources,subtype=subtype,year=year)
     context_retrieved_formatted = "||".join(doc.page_content for doc in context_retrieved)
     ##-----------------------get answer from endpoints------------------------------
     answer_yet = ""
+    # Create logs data structure at the beginning (so that feedback can be saved after streaming
+    timestamp = str(datetime.now().timestamp())
+    logs_data = {
+        "system_prompt": SYSTEM_PROMPT,
+        "sources": sources,
+        "reports": reports,
+        "subtype": subtype,
+        "year": year,
+        "question": query,
+        "retriever": model_config.get('retriever','MODEL'),
+        "endpoint_type": model_config.get('reader','TYPE'),
+        "reader": model_config.get('reader','NVIDIA_MODEL'),
+        "docs": [doc.page_content for doc in context_retrieved],
+        "answer": "",  # Updated after streaming
+        "time": timestamp,
+    }
     if model_config.get('reader','TYPE') == 'NVIDIA':
         chat_model = nvidia_client()
         async def process_stream():
                     answer_yet += token
                     parsed_answer = parse_output_llm_with_sources(answer_yet)
                     history[-1] = (query, parsed_answer)
+                    # Update logs_data with current answer
+                    logs_data["answer"] = parsed_answer
+                    yield [tuple(x) for x in history], docs_html, logs_data
         # Stream the response updates
         async for update in process_stream():
             yield update
     else:
+        chat_model = dedicated_endpoint() # TESTING: ADAPTED FOR HF INFERENCE API
         async def process_stream():
+            nonlocal answer_yet
+            try:
+                formatted_messages = [
+                    {
+                        "role": msg.type if hasattr(msg, 'type') else msg.role,
+                        "content": msg.content
+                    }
+                    for msg in messages
+                ]
+                response = chat_model.chat_completion(
+                    messages=formatted_messages,
+                    max_tokens=int(model_config.get('reader', 'MAX_TOKENS'))
+                )
+                response_text = response.choices[0].message.content
+                words = response_text.split()
+                for word in words:
+                    answer_yet += word + " "
+                    parsed_answer = parse_output_llm_with_sources(answer_yet)
+                    history[-1] = (query, parsed_answer)
+                    # Update logs_data with current answer
+                    logs_data["answer"] = parsed_answer
+                    yield [tuple(x) for x in history], docs_html, logs_data
+                    await asyncio.sleep(0.05)
+            except Exception as e:
+                logger.error(f"Error in process_stream: {str(e)}")
+                raise
         async for update in process_stream():
             yield update
     try:
+        # Save log after streaming is complete
+        save_logs(scheduler, JSON_DATASET_PATH, logs_data)
     except Exception as e:
         logging.error(e)
+    #-------------------- Feedback UI elements + state management -------------------------
+    with gr.Row(visible=False) as feedback_row:
+        gr.Markdown("Was this response helpful?")
+        with gr.Row():
+            okay_btn = gr.Button("👍 Okay", elem_classes="feedback-button")
+            not_okay_btn = gr.Button("👎 Not to expectations", elem_classes="feedback-button")
+    feedback_thanks = gr.Markdown("Thanks for the feedback!", visible=False)
+    feedback_state = gr.State()  # Add state to store logs data
+    def show_feedback(logs):
+        """Show feedback buttons and store logs in state"""
+        return gr.update(visible=True), gr.update(visible=False), logs
+    def submit_feedback_okay(logs_data):
+        """Handle 'okay' feedback submission"""
+        return submit_feedback("okay", logs_data)
+    def submit_feedback_not_okay(logs_data):
+        """Handle 'not okay' feedback submission"""
+        return submit_feedback("not_okay", logs_data)
+    okay_btn.click(
+        submit_feedback_okay,
+        [feedback_state],
+        [feedback_row, feedback_thanks]
+    )
+    not_okay_btn.click(
+        submit_feedback_not_okay,
+        [feedback_state],
+        [feedback_row, feedback_thanks]
+    )
+    #-------------------- Gradio voodoo continued -------------------------
+    # Using event listeners for 1. query box 2. click on example question
     # https://www.gradio.app/docs/gradio/textbox#event-listeners-arguments
     (textbox
+        .submit(start_chat, [textbox, chatbot], [textbox, tabs, chatbot], queue=False, api_name="start_chat_textbox")
+        # queue must be set as False (default) so the process is not waiting for another to be finished
+        .then(chat, [textbox, chatbot, dropdown_sources, dropdown_reports, dropdown_category, dropdown_year], [chatbot, sources_textbox, feedback_state], queue=True, concurrency_limit=8, api_name="chat_textbox")
+        .then(show_feedback, [feedback_state], [feedback_row, feedback_thanks, feedback_state], api_name="show_feedback_textbox")
+        .then(finish_chat, None, [textbox], api_name="finish_chat_textbox"))
     (examples_hidden
         .change(start_chat, [examples_hidden, chatbot], [textbox, tabs, chatbot], queue=False, api_name="start_chat_examples")
         # queue must be set as False (default) so the process is not waiting for another to be finished
+        .then(chat, [examples_hidden, chatbot, dropdown_sources, dropdown_reports, dropdown_category, dropdown_year], [chatbot, sources_textbox, feedback_state], queue=True, concurrency_limit=8, api_name="chat_examples")
+        .then(show_feedback, [feedback_state], [feedback_row, feedback_thanks, feedback_state], api_name="show_feedback_examples")
+        .then(finish_chat, None, [textbox], api_name="finish_chat_examples"))
     demo.queue()
+demo.launch()
+logger.info("App launched")

auditqa/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/auditqa/__pycache__/__init__.cpython-310.pyc and b/auditqa/__pycache__/__init__.cpython-310.pyc differ

auditqa/__pycache__/process_chunks.cpython-310.pyc CHANGED Viewed

Binary files a/auditqa/__pycache__/process_chunks.cpython-310.pyc and b/auditqa/__pycache__/process_chunks.cpython-310.pyc differ

auditqa/__pycache__/reader.cpython-310.pyc CHANGED Viewed

Binary files a/auditqa/__pycache__/reader.cpython-310.pyc and b/auditqa/__pycache__/reader.cpython-310.pyc differ

auditqa/__pycache__/reports.cpython-310.pyc CHANGED Viewed

Binary files a/auditqa/__pycache__/reports.cpython-310.pyc and b/auditqa/__pycache__/reports.cpython-310.pyc differ

auditqa/__pycache__/retriever.cpython-310.pyc CHANGED Viewed

Binary files a/auditqa/__pycache__/retriever.cpython-310.pyc and b/auditqa/__pycache__/retriever.cpython-310.pyc differ

auditqa/__pycache__/sample_questions.cpython-310.pyc CHANGED Viewed

Binary files a/auditqa/__pycache__/sample_questions.cpython-310.pyc and b/auditqa/__pycache__/sample_questions.cpython-310.pyc differ

auditqa/__pycache__/utils.cpython-310.pyc CHANGED Viewed

Binary files a/auditqa/__pycache__/utils.cpython-310.pyc and b/auditqa/__pycache__/utils.cpython-310.pyc differ

auditqa/process_chunks.py CHANGED Viewed

@@ -11,10 +11,17 @@ from qdrant_client import QdrantClient
 from auditqa.reports import files, report_list
 from langchain.docstore.document import Document
 import configparser
 # read all the necessary variables
 device = 'cuda' if cuda.is_available() else 'cpu'
-path_to_data = "./reports/"
 ##---------------------functions -------------------------------------------##
@@ -118,7 +125,7 @@ def load_new_chunks():
     """
     this method reads through the files and report_list to create the vector database
     """
     #  we iterate through the files which contain information about its
     # 'source'=='category', 'subtype', these are used in UI for document selection
     #  which will be used later for filtering database
@@ -161,7 +168,7 @@ def load_new_chunks():
     qdrant_collections['allreports'] = Qdrant.from_documents(
                 all_documents,
                 embeddings,
-                path="/data/local_qdrant",
                 collection_name='allreports',
             )
     print(qdrant_collections)
@@ -169,14 +176,16 @@ def load_new_chunks():
     return qdrant_collections
 def get_local_qdrant():
-    """once the local qdrant server is created this is used to make the connection to exisitng server"""
     config = getconfig("./model_params.cfg")
     qdrant_collections = {}
     embeddings = HuggingFaceEmbeddings(
         model_kwargs = {'device': device},
         encode_kwargs = {'normalize_embeddings': True},
         model_name=config.get('retriever','MODEL'))
-    client = QdrantClient(path="/data/local_qdrant")
-    print("Collections in local Qdrant:",client.get_collections())
     qdrant_collections['allreports'] = Qdrant(client=client, collection_name='allreports', embeddings=embeddings, )
     return qdrant_collections

 from auditqa.reports import files, report_list
 from langchain.docstore.document import Document
 import configparser
+from pathlib import Path
 # read all the necessary variables
 device = 'cuda' if cuda.is_available() else 'cpu'
+path_to_data = "./reports/"
+# TESTING DEBUG LOG
+from auditqa.logging_config import setup_logging
+setup_logging()
+import logging
+logger = logging.getLogger(__name__)
 ##---------------------functions -------------------------------------------##
     """
     this method reads through the files and report_list to create the vector database
     """
+    logger.info("Loading new chunks")
     #  we iterate through the files which contain information about its
     # 'source'=='category', 'subtype', these are used in UI for document selection
     #  which will be used later for filtering database
     qdrant_collections['allreports'] = Qdrant.from_documents(
                 all_documents,
                 embeddings,
+                path="./data/local_qdrant",
                 collection_name='allreports',
             )
     print(qdrant_collections)
     return qdrant_collections
 def get_local_qdrant():
+    """once the local qdrant server is created this is used to make the connection to existing server"""
     config = getconfig("./model_params.cfg")
     qdrant_collections = {}
     embeddings = HuggingFaceEmbeddings(
         model_kwargs = {'device': device},
         encode_kwargs = {'normalize_embeddings': True},
         model_name=config.get('retriever','MODEL'))
+    # Change the path to a local directory
+    client = QdrantClient(path="./data/local_qdrant")
+    print("Collections in local Qdrant:", client.get_collections())
     qdrant_collections['allreports'] = Qdrant(client=client, collection_name='allreports', embeddings=embeddings, )
     return qdrant_collections

auditqa/reader.py CHANGED Viewed

@@ -7,38 +7,49 @@ import os
 from dotenv import load_dotenv
 load_dotenv()
-model_config = getconfig("model_params.cfg")
-NVIDIA_SERVER = os.environ["NVIDIA_SERVERLESS"]
-HF_token = os.environ["LLAMA_3_1"]
 def nvidia_client():
     """ returns the nvidia server client """
-    client = InferenceClient(
-    base_url=model_config.get('reader','NVIDIA_ENDPOINT'),
-    api_key=NVIDIA_SERVER)
-    print("getting nvidia client")
-    return client
 def dedicated_endpoint():
-    """ returns the dedicated server endpoint"""
-     # Set up the streaming callback handler
-    callback = StreamingStdOutCallbackHandler()
-    # Initialize the HuggingFaceEndpoint with streaming enabled
-    llm_qa = HuggingFaceEndpoint(
-        endpoint_url=model_config.get('reader', 'DEDICATED_ENDPOINT'),
-        max_new_tokens=int(model_config.get('reader','MAX_TOKENS')),
-        repetition_penalty=1.03,
-        timeout=70,
-        huggingfacehub_api_token=HF_token,
-        streaming=True, # Enable streaming for real-time token generation
-        callbacks=[callback] # Add the streaming callback handler
-    )
-    # Create a ChatHuggingFace instance with the streaming-enabled endpoint
-    chat_model = ChatHuggingFace(llm=llm_qa)
-    print("getting dedicated endpoint wrapped in ChathuggingFace ")
-    return chat_model

 from dotenv import load_dotenv
 load_dotenv()
+# TESTING DEBUG LOG
+from auditqa.logging_config import setup_logging
+setup_logging()
+import logging
+logger = logging.getLogger(__name__)
+model_config = getconfig("model_params.cfg")
+# NVIDIA_SERVER = os.environ["NVIDIA_SERVERLESS"]
+# HF_token = os.environ["LLAMA_3_1"]
+HF_token = os.getenv('LLAMA_3_1') # TESTING
 def nvidia_client():
+    logger.info("NVIDIA client activated")
     """ returns the nvidia server client """
+    try:
+        NVIDIA_SERVER = os.environ["NVIDIA_SERVERLESS"]
+        client = InferenceClient(
+            base_url=model_config.get('reader','NVIDIA_ENDPOINT'),
+            api_key=NVIDIA_SERVER)
+        print("getting nvidia client")
+        return client
+    except KeyError:
+        raise KeyError("NVIDIA_SERVERLESS environment variable not set. Required for NVIDIA endpoint.")
+# TESTING VERSION
 def dedicated_endpoint():
+    logger.info("Serverless endpoint activated")
+    try:
+        hf_api_key = os.environ["LLAMA_3_1"]
+        if not hf_api_key:
+            raise ValueError("LLAMA_3_1 environment variable is empty")
+        model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+        logger.info(f"Initializing InferenceClient with model: {model_id}")
+        client = InferenceClient(
+            model=model_id,
+            api_key=hf_api_key,
+        )
+        logger.info("Serverless InferenceClient initialization successful")
+        return client
+    except Exception as e:
+        logger.error(f"Error initializing dedicated endpoint: {str(e)}")
+        raise

auditqa/retriever.py CHANGED Viewed

@@ -4,6 +4,12 @@ from langchain.retrievers import ContextualCompressionRetriever
 from langchain.retrievers.document_compressors import CrossEncoderReranker
 from langchain_community.cross_encoders import HuggingFaceCrossEncoder
 model_config = getconfig("model_params.cfg")
 def create_filter(reports:list = [],sources:str =None,
@@ -35,9 +41,12 @@ def create_filter(reports:list = [],sources:str =None,
     return filter
 def get_context(vectorstore,query,reports,sources,subtype,year):
     # create metadata filter
-    filter = create_filter(reports=reports,sources=sources,subtype=subtype,year=year)
     # getting context
     retriever = vectorstore.as_retriever(search_type="similarity_score_threshold",
@@ -50,7 +59,9 @@ def get_context(vectorstore,query,reports,sources,subtype,year):
     compression_retriever = ContextualCompressionRetriever(
             base_compressor=compressor, base_retriever=retriever
         )
     context_retrieved = compression_retriever.invoke(query)
     print(f"retrieved paragraphs:{len(context_retrieved)}")
     return context_retrieved

 from langchain.retrievers.document_compressors import CrossEncoderReranker
 from langchain_community.cross_encoders import HuggingFaceCrossEncoder
+# TESTING DEBUG LOG
+from auditqa.logging_config import setup_logging
+setup_logging()
+import logging
+logger = logging.getLogger(__name__)
 model_config = getconfig("model_params.cfg")
 def create_filter(reports:list = [],sources:str =None,
     return filter
 def get_context(vectorstore,query,reports,sources,subtype,year):
+    logger.info("Retriever activated")
     # create metadata filter
+    # filter = create_filter(reports=reports,sources=sources,subtype=subtype,year=year)
+    filter = None
     # getting context
     retriever = vectorstore.as_retriever(search_type="similarity_score_threshold",
     compression_retriever = ContextualCompressionRetriever(
             base_compressor=compressor, base_retriever=retriever
         )
     context_retrieved = compression_retriever.invoke(query)
+    logger.info(f"retrieved paragraphs:{len(context_retrieved)}")
     print(f"retrieved paragraphs:{len(context_retrieved)}")
     return context_retrieved

auditqa/utils.py CHANGED Viewed

@@ -6,10 +6,14 @@ from langchain.schema import (
     SystemMessage,
 )
-def save_logs(scheduler, JSON_DATASET_PATH, logs) -> None:
     """ Every interaction with app saves the log of question and answer,
-        this is to get the usage statistics of app and evaluate model performances
     """
     with scheduler.lock:
         with JSON_DATASET_PATH.open("a") as f:
             json.dump(logs, f)

     SystemMessage,
 )
+def save_logs(scheduler, JSON_DATASET_PATH, logs, feedback=None) -> None:
     """ Every interaction with app saves the log of question and answer,
+        this is to get the usage statistics of app and evaluate model performances.
+        Also saves user feedback (when provided).
     """
+    if feedback:
+        logs["feedback"] = feedback #optional
     with scheduler.lock:
         with JSON_DATASET_PATH.open("a") as f:
             json.dump(logs, f)

model_params.cfg CHANGED Viewed

@@ -6,9 +6,9 @@ TOP_K = 20
 MODEL = BAAI/bge-reranker-base
 TOP_K = 3
 [reader]
-TYPE = NVIDIA
 DEDICATED_MODEL = meta-llama/Llama-3.1-8B-Instruct
 DEDICATED_ENDPOINT = https://qu2d8m6dmsollhly.us-east-1.aws.endpoints.huggingface.cloud
 NVIDIA_MODEL = meta-llama/Llama-3.1-8B-Instruct
 NVIDIA_ENDPOINT = https://huggingface.co/api/integrations/dgx/v1
-MAX_TOKENS = 512

 MODEL = BAAI/bge-reranker-base
 TOP_K = 3
 [reader]
+TYPE = DEDICATED
 DEDICATED_MODEL = meta-llama/Llama-3.1-8B-Instruct
 DEDICATED_ENDPOINT = https://qu2d8m6dmsollhly.us-east-1.aws.endpoints.huggingface.cloud
 NVIDIA_MODEL = meta-llama/Llama-3.1-8B-Instruct
 NVIDIA_ENDPOINT = https://huggingface.co/api/integrations/dgx/v1
+MAX_TOKENS = 256

style.css CHANGED Viewed

@@ -1,4 +1,3 @@
 /* :root {
     --user-image: url('https://ih1.redbubble.net/image.4776899543.6215/st,small,507x507-pad,600x600,f8f8f8.jpg');
   } */
@@ -360,3 +359,15 @@ span.chatbot > p > img{
 .a-doc-ref{
 	text-decoration: none !important;
 }

 /* :root {
     --user-image: url('https://ih1.redbubble.net/image.4776899543.6215/st,small,507x507-pad,600x600,f8f8f8.jpg');
   } */
 .a-doc-ref{
 	text-decoration: none !important;
 }
+.feedback-button {
+    border: none;
+    padding: 8px 16px;
+    border-radius: 4px;
+    cursor: pointer;
+    transition: background-color 0.3s;
+}
+.feedback-button:hover {
+    opacity: 0.8;
+}