Spaces:

mrfirdauss
/

QALocalLLM

Sleeping

App Files Files Community

mrfirdauss commited on Sep 9, 2025

Commit

18a508e

1 Parent(s): cb77609

init: init repo

Browse files

Files changed (10) hide show

.gitattributes +2 -0
Dockerfile +6 -0
fraudTrainData.pkl +3 -0
requirements.txt +8 -2
src/FinancialAgent.py +98 -0
src/FinancialAgentOllama.py +91 -0
src/models.py +8 -0
src/prompt.py +108 -0
src/streamlit_app.py +10 -36
vs_68bf713eea2c81919ac08298a05d6704/index.pkl +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.faiss filter=lfs diff=lfs merge=lfs -text

Dockerfile CHANGED Viewed

@@ -8,11 +8,17 @@ RUN apt-get update && apt-get install -y \
     git \
     && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt ./
 COPY src/ ./src/
 RUN pip3 install -r requirements.txt
 EXPOSE 8501
 HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health

     git \
     && rm -rf /var/lib/apt/lists/*
+RUN curl -fsSL https://ollama.com/install.sh | sh
+RUN ollama serve --detach &
 COPY requirements.txt ./
 COPY src/ ./src/
 RUN pip3 install -r requirements.txt
+RUN ollama pull qwen3:4b
 EXPOSE 8501
 HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health

fraudTrainData.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd3363405c1efbd9862c0b6c4ebebdac1fc9aff175063847c4cda60dc8c50f32
+size 254853611

requirements.txt CHANGED Viewed

@@ -1,3 +1,9 @@
-altair
 pandas
-streamlit

+ollama
 pandas
+streamlit
+matplotlib
+langchain-community
+faiss-cpu
+pypdf
+sentence-transformers
+huggingface_hub

src/FinancialAgent.py ADDED Viewed

	@@ -0,0 +1,98 @@

+from abc import ABC, abstractmethod
+from prompt import FINAL_PROMPT, ResponseState
+from models import ResponseState
+import streamlit as st
+import pickle
+from matplotlib.pyplot import plt
+import io
+import numpy as np
+import pandas as pd
+class FinancialAgentFactory(ABC):
+    """Abstract Factory for creating Financial Agents."""
+    def __init__(self, st: st, model_name="gpt-4o"):
+        self.st = st
+        self.df = pickle.load(open("fraudTrainData.pkl", "rb"))
+        self.model_name = model_name
+        if "messages" not in self.st.session_state:
+            self.st.session_state.messages = []
+        self.st.session_state["openai_model"] = self.model_name
+    @abstractmethod
+    def render_header(self, header="Financial Agent"):
+        self.st.title(header)
+    @abstractmethod
+    def render_messages(self):
+        """Render previous chat messages."""
+        for message in self.st.session_state.messages:
+            with self.st.chat_message(message["role"]):
+                self.st.markdown(message["content"])
+    @abstractmethod
+    def __stream_answer__(self, instructions, input_messages):
+        """Stream answer from the model."""
+        pass
+    @abstractmethod
+    def process_prompt(self, prompt):
+        """Main pipeline for processing a new user input."""
+        pass
+    def __safe_savefig__(*args, **kwargs):
+        buf = io.BytesIO()
+        plt.savefig(buf, format="png")
+        buf.seek(0)
+        return buf
+    def __handle_context__(self, response_state: ResponseState) -> str:
+        """Handle additional context (data, PDF, etc.)."""
+        context_prompt = ""
+        if response_state.contextType in ("data", "both"):
+            local_scope = {"df": self.df, "np": np, "pd": pd, "plt": plt, "savefig": self.__safe_savefig__}
+            exec(response_state.code, {}, local_scope)
+            fig = plt.gcf()
+            if fig.get_axes():  # if a chart was generated
+                with st.chat_message("assistant"):
+                    st.pyplot(fig)
+                plt.close(fig)
+            context_prompt = "## CONTEXT DATAFRAME.\n"
+            context_prompt += str(local_scope.get("result", ""))
+        # Placeholder for PDF or other context handling
+        # elif response_state.contextType in ("pdf", "both"):
+        #     context_prompt = "Provide the relevant information from the PDF documents."
+        return context_prompt
+    def generate_final_answer(self, context_prompt: str):
+        """Generate and stream the final answer with context."""
+        with st.chat_message("assistant"):
+            answer = st.write_stream(
+                self.__stream_answer__(
+                    instructions=FINAL_PROMPT,
+                    input_messages=[
+                        {"role": m["role"], "content": m["content"]}
+                        for m in st.session_state.messages
+                    ] + [{"role": "user", "content": context_prompt}]
+                )
+            )
+        st.session_state.messages.append({"role": "assistant", "content": answer})
+    def display_final_answer(self, answer: str):
+        """Display a non-streamed assistant answer."""
+        st.session_state.messages.append({"role": "assistant", "content": answer})
+        with st.chat_message("assistant"):
+            st.markdown(answer)
+    def run(self):
+        """Run the app."""
+        self.render_header()
+        self.render_messages()
+        if prompt := st.chat_input("What is up?"):
+            self.process_prompt(prompt)

src/FinancialAgentOllama.py ADDED Viewed

	@@ -0,0 +1,91 @@

+from ollama import chat
+from FinancialAgent import FinancialAgentFactory
+from prompt import REFINERY_PROMPT
+from models import ResponseState
+import numpy as np
+import pandas as pd
+import streamlit as st
+import matplotlib.pyplot as plt
+from langchain_community.vectorstores import FAISS
+class FinancialAgentOllama(FinancialAgentFactory):
+    """Concrete Financial Agent using Ollama."""
+    def __init__(self, st, model_name="deepseek-r1:8b", embedding=None):
+        super().__init__(st, model_name)
+        self.client = chat
+        self.vector_db = FAISS.load_local("vs_68bf713eea2c81919ac08298a05d6704", embedding, allow_dangerous_deserialization=True)
+    def __stream_answer__(self, instructions, input_messages):
+        response_stream = self.client(
+            message=input_messages + [{"role": "user", "content": instructions}],
+            model=self.model_name,
+            stream=True
+        )
+        for chunk in response_stream:
+            yield chunk.message.content
+    def generate_final_answer(self, context_prompt):
+        """Generate final answer using context."""
+        with self.st.chat_message("assistant"):
+            answer = self.st.write_stream(
+                self.__stream_answer__(context_prompt,
+                [{"role": m["role"], "content": m["content"]} for m in self.st.session_state.messages])
+            )
+        self.st.session_state.messages.append({"role": "assistant", "content": answer})
+        self.st.experimental_rerun()
+    def __handle_context__ (self, response_state: ResponseState) -> str:
+        """Handle context retrieval based on response state."""
+        context_prompt = ""
+        if response_state.contextType in ("data", "both"):
+            local_scope = {"df": self.df, "np": np, "pd": pd, "plt": plt, "savefig": self.__safe_savefig__}
+            exec(response_state.code, {}, local_scope)
+            fig = plt.gcf()
+            if fig.get_axes():  # if a chart was generated
+                with st.chat_message("assistant"):
+                    st.pyplot(fig)
+                plt.close(fig)
+            context_prompt = "## CONTEXT DATAFRAME.\n"
+            context_prompt += str(local_scope.get("result", ""))
+        if response_state.contextType in ("pdf", "both"):
+            context_prompt += "## CONTEXT PDF.\n"
+            results = self.vector_db.similarity_search(response_state.retriverKey, k=5)
+            for i, doc in enumerate(results, 1):
+                context_prompt += f"### Document {i}\n{doc.page_content}\n"
+        return context_prompt
+    def process_prompt(self, prompt):
+        """Main pipeline for processing a new user input."""
+        self.st.session_state.messages.append({"role": "user", "content": prompt})
+        with self.st.chat_message("user"):
+            self.st.markdown(prompt)
+        # Step 1: Run refinery prompt
+        response = self.client(
+            message=[{"role": m["role"], "content": m["content"]} for m in self.st.session_state.messages] +
+                    [{"role": "user", "content": REFINERY_PROMPT.format(
+                        response_format=ResponseState.model_json_schema(),
+                        df_head=self.df.head().to_markdown(),
+                        df_columns=self.df.columns.tolist(),
+                        df_sample=self.df.sample(5).to_markdown()
+                    )}],
+            model=self.model_name,
+            stream=False,
+            format=ResponseState
+        )
+        response_state: ResponseState = ResponseState.model_validate_json(response.message.content)
+        # Step 2: Check if context is needed
+        if response_state.isNeedContext:
+            context_prompt = self.__handle_context__(response_state)
+            self.generate_final_answer(context_prompt)
+        else:
+            self.display_final_answer(response_state.response)

src/models.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from pydantic import BaseModel, Field
+class ResponseState(BaseModel):
+    isNeedContext: bool = Field(default=False, description="Whether the model needs additional context from data from or pdf")
+    response: str = Field(default="", description="The response from the model")
+    contextType: str = Field(default="", description="If isNeedContext true then The type of context needed, e.g., 'data' or 'pdf' or 'both'")
+    retriverKey: str = Field(default="", description="If isNeedContext true and contextType is pdf or both then The key to use to retrieve the context from the pdf for vector db. The key is in plain text.")
+    code: str = Field(default="", description="The data already loaded in a dataframe named df, you only need to write code to gain insights from the data. Insert all necessary info inside string variable named result")

src/prompt.py ADDED Viewed

	@@ -0,0 +1,108 @@

+REFINERY_PROMPT = """
+### PROMPT:
+YOU ARE A FINANCIAL EXPERT. I WANT YOU TO ACT AS A DATA ANALYST AND ASSIST ME IN UNDERSTANDING AND INTERPRETING THE DATA PROVIDED.
+BASED ON INPUT, I WANT YOU TO REFINE THE USER'S QUESTION AND DETERMINE IF YOU NEED ADDITIONAL CONTEXT FROM DATA OR PDF DOCUMENTS TO ANSWER THE QUESTION.
+IF YOU NEED CONTEXT, PLEASE SPECIFY THE TYPE OF CONTEXT NEEDED ('data', 'pdf', OR 'both') AND PROVIDE THE CODE TO EXECUTE TO GAIN INSIGHTS FROM THE DATA.
+IF WANT TO SAFE PLOT, USE THE PROVIDED 'savefig()' FUNCTION TO SAVE THE PLOT AND RETURN THE BUFFER.
+IF YOU DO NOT NEED ADDITIONAL CONTEXT, PLEASE PROVIDE A DIRECT ANSWER TO THE USER'S QUESTION.
+PLEASE RESPOND IN JSON FORMAT WITH THE FOLLOWING.
+### Response Format:
+{response_format}
+### Context:
+#### Dataframe
+This is a credit card transaction dataset containing legitimate and fraud transactions from the duration 1st Jan 2019 - 31st Dec 2020. It covers credit cards of 1000 customers doing transactions with a pool of 800 merchants.
+The data location is located in USA and the currency used is USD. If there is other country or area asked, please answer that the data only covers USA.
+Latest data might not available, so please answer based on the data available.
+table head, columns, and sample data:
+dataframe head:
+    {df_head}
+dataframe columns type:
+    index - Unique Identifier for each row
+    cc_num - Credit Card Number of Customer
+    trans_date_trans_time - Transaction DateTime
+    merchant - Merchant Name
+    category - Category of Merchant
+    amt - Amount of Transaction
+    first - First Name of Credit Card Holder
+    last - Last Name of Credit Card Holder
+    gender - Gender of Credit Card Holder
+    street - Street Address of Credit Card Holder
+    city - City of Credit Card Holder
+    state - State of Credit Card Holder
+    zip - Zip of Credit Card Holder
+    lat - Latitude Location of Credit Card Holder
+    long - Longitude Location of Credit Card Holder
+    city_pop - Credit Card Holder's City Population
+    job - Job of Credit Card Holder
+    dob - Date of Birth of Credit Card Holder
+    trans_num - Transaction Number
+    unix_time - UNIX Time of transaction
+    merch_lat - Latitude Location of Merchant
+    merch_long - Longitude Location of Merchant
+    is_fraud - Fraud Flag
+dataframe sample data:
+    {df_sample}
+#### PDF Document
+File PDF summary:
+Understanding Credit Card Frauds Card Busienss Review by Tata Consulting Service:
+This paper is released at 2003. The latest data are not available. If required, please answer that you dont have the data.
+This paper contain world wide snapshot data before 2003 about credit card fraud.
+Card fraud is a major global threat, particularly in online “card-not-present” transactions where fraud rates far exceed in-person purchases.
+Common techniques include lost/stolen cards, identity theft, counterfeit cards, skimming, and internet schemes such as site cloning and false merchant sites. While cardholders are typically protected by law, merchants bear the highest costs through chargebacks, penalties, and reputational damage, with banks also incurring significant prevention expenses.
+Effective management requires a layered approach: verification systems (AVS, CVV, payer authentication), blacklists/whitelists, and advanced methods like risk scoring, neural networks, biometrics, and smart cards. The key challenge is balancing fraud losses with the cost of prevention to minimize the total cost of fraud while maintaining trust in the payment ecosystem.
+Table of Contents:
+    Overview
+    Introduction
+    2.1. Purpose of this Paper
+    Current State of the Industry
+    How Fraud is Committed Worldwide
+    Fraud Techniques
+    5.1. Card-Related Frauds
+    - Application Fraud
+    - Lost / Stolen Cards
+    - Account Takeover
+    - Fake and Counterfeit Cards
+    5.2. Merchant-Related Frauds
+    - Merchant Collusion
+    - Triangulation
+    5.3. Internet-Related Frauds
+    Impact of Credit Card Frauds
+    6.1. Impact on Cardholders
+    6.2. Impact on Merchants
+    6.3. Impact on Banks (Issuer / Acquirer)
+    Fraud Prevention and Management
+    7.1. Fraud Prevention Technologies
+    - Manual Review
+    - Address Verification System (AVS)
+    - Card Verification Methods
+    - Negative and Positive Lists
+    - Payer Authentication
+    - Lockout Mechanisms
+    - Fraudulent Merchants
+    7.2. Recent Developments in Fraud Management
+    - Simple Rule Systems
+    - Risk Scoring Technologies
+    - Neural Network Technologies
+    - Biometrics
+    - Smart Cards
+    Managing the Total Cost of Fraud
+"""
+FINAL_PROMPT = """"
+You are a financial expert. Use the provided context to answer the user's question.
+IF THE CONTEXT INSUFFICIENT ANSWER WITH 'Insufficient context to answer the question.' AND TELL WHY NOT TO MAKE UP ANSWER. WHAT CONTEX IS MISSING.
+"""

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,14 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+from FinancialAgentOllama import FinancialAgentOllama
+from langchain.embeddings import HuggingFaceEmbeddings
+from huggingface_hub import hf_hub_download
+# Download the .faiss file from a model or dataset repo
+index_path = hf_hub_download(
+    repo_id="mrfirdauss/FaissBhatlaBook/",
+    filename="vs_68bf713eea2c81919ac08298a05d6704/index.faiss"
+)
+app = FinancialAgentOllama(st, model_name="qwen3:4b", embedding=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"))
+app.run()

vs_68bf713eea2c81919ac08298a05d6704/index.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ed1be49bcb8d019522a1838992eaad2c3fd5f8ed62c4da9b6c8cee3f17bfc78
+size 69695