mBA-Terminal / market_profile_paper.tex

Upload 29 files

c99df4c verified 4 days ago

32.1 kB

	\documentclass[conference]{IEEEtran}

	% ─── Packages ───────────────────────────────────────────
	\usepackage[utf8]{inputenc}
	\usepackage[T1]{fontenc}
	\usepackage{amsmath, amssymb, amsfonts}
	\usepackage{graphicx}
	\usepackage{booktabs}
	\usepackage{hyperref}
	\usepackage{float}
	\usepackage{caption}
	\usepackage{subcaption}
	\usepackage{xcolor}
	\usepackage{enumitem}
	\usepackage{cite}
	\usepackage{array}
	\usepackage{url}

	\hypersetup{
	colorlinks=true,
	linkcolor=blue!70!black,
	citecolor=blue!70!black,
	urlcolor=blue!60!black,
	}

	% ─── Title ──────────────────────────────────────────────
	\title{mBA-Profile: Market Profile Construction from Microsecond Bid-Ask Unit Data Using The Path-weighted Gap-filling Approach}

	\author{
	\IEEEauthorblockN{
	Rembrant Oyangoren Albeos~%
	\href{https://orcid.org/0009-0006-8743-4419}{%
	\includegraphics[height=8pt]{ORCID_icon.png}%
	}%
	\textsuperscript{\hyperref[sec:author_info]{$\dagger$}}
	}
	\IEEEauthorblockA{%
	\includegraphics[height=7pt]{ContinualQuasars_icon.png}\hspace{0.4em}Continual Quasars\\
	}
	}

	\begin{document}
	\maketitle

	% ════════════════════════════════════════════════════════
	\begin{abstract}
	Conventional market profile construction collapses raw price data
	directly into a Y-distribution histogram, recording only the price
	levels that were explicitly quoted by the exchange or broker feed.
	This paper presents an alternative approach---termed
	\emph{path-weighted gap-filling}---in which synthetic trail-datapoints
	are inserted at every intermediate unit-level price between
	consecutive observations, producing an extended dataset that yields a
	substantially denser and more continuous market profile. The
	modelling is grounded in microsecond-resolution raw bid/ask unit data
	rather than aggregated TOHLC (time, open, high, low, close) bars or
	volume figures, thereby preserving the highest available fidelity of
	the underlying price process. We demonstrate the approach on a full
	trading day of \texttt{XAUUSDc} data collected from a live trading
	environment, and show that the gap-filled profile eliminates the empty
	bins and sparse regions that afflict raw-unit profiles during fast
	directional moves, producing a more representative picture of
	intraday price dynamics. All resources and code used in this work are available on GitHub at \url{https://github.com/ContinualQuasars/mBA-Profile}.

	\end{abstract}

	\begin{IEEEkeywords}
	Market profile, unit data, microsecond bid--ask, market microstructure, path-weighted, gap-filling.
	\end{IEEEkeywords}

	% ════════════════════════════════════════════════════════
	\section{Introduction}
	\label{sec:intro}

	\subsection{Market Microstructure and Unit Data}

	At the most granular level of market data, financial instruments are
	quoted through discrete data updates: each update represents a change in
	the best bid price, the best ask price, or both
	simultaneously~\cite{hasbrouck2007,ohara1995}. Modern trading
	platforms such as MetaTrader~5 (MT5) record these events with
	millisecond-resolution timestamps, and the data is made available
	through a Python API~\cite{mt5docs}.

	The instrument studied in this paper is \texttt{XAUUSDc}, a
	gold CFD (Contract for Difference) traded on a
	standard cent live trading account provided by the Exness broker,
	accessed through the MT5 platform. The \texttt{c} suffix in
	\texttt{XAUUSDc} is an Exness broker account-type indicator
	(denoting a standard cent live account) and has no bearing on the
	XAU price data itself---extracting data from \texttt{XAUUSDc}
	(cent account) or \texttt{XAUUSDm} (dollar account) yields the
	same XAUUSD price data with three decimal places. The minimum price
	increment for this instrument is exactly \$0.001.
	Because the standard lot size for gold is 100 troy ounces, a single
	price movement of \$0.001 corresponds to a profit-or-loss change of
	\$0.10 per standard lot. In this study, the market profile
	bin size is set to \$0.01 (one unit, where 0.01~unit = \$0.01 XAU
	price change), to produce a more
	stable and interpretable distribution.

	\subsection{The Market Profile Concept}

	A market profile is a rotated histogram of price over a defined time
	window. The concept was introduced by J.~Peter Steidlmayer at the
	Chicago Board of Trade in the 1980s~\cite{steidlmayer1986}.
	Traditionally, a market profile uses 30-minute ``Time Price
	Opportunity'' (TPO) letters stacked at each price level to show where
	price spent the most time during a trading session~\cite{dalton2007}.
	The horizontal axis represents frequency or time density, while the
	vertical axis represents price.

	In this study, the concept is adapted to microsecond unit data.
	Instead of 30-minute TPO letters, each histogram bar represents the
	number of data updates (or interpolated price levels, in the
	gap-filled approach) observed at that price. The construction is
	based exclusively on raw bid/ask unit data---not on TOHLC candles or
	volume bars---ensuring that no information is lost to
	aggregation~\cite{ane2000,engle2000}.

	\subsection{Paper Outline}

	Section~\ref{sec:data} describes the data acquisition pipeline and
	the dataset used. Section~\ref{sec:raw} details the raw unit
	approach. Section~\ref{sec:filled} introduces the gap-filled
	(path-weighted) approach, including a detailed explanation of why
	path-weighting is used. Section~\ref{sec:comparison} provides a
	comprehensive comparison of the two approaches.
	Section~\ref{sec:conclusion} concludes.


	% ════════════════════════════════════════════════════════
	\section{Data Acquisition}
	\label{sec:data}

	\subsection{Trading Environment}

	The unit data used in this study was collected from a standard cent
	live trading account on the Exness broker, accessed through MetaTrader~5.
	MT5 is a multi-asset trading platform developed by MetaQuotes Software
	Corp.\ that is widely used for forex and CFD
	trading~\cite{mt5docs,metaquotes2024}. Its Python integration exposes
	the function \texttt{copy\_ticks\_range()}, which returns every data
	update within a specified time window as a structured NumPy
	array~\cite{numpy2020}. Each data record contains the following
	fields: a Unix timestamp in seconds, a millisecond-precision timestamp
	providing sub-second resolution, the best bid price, the best ask
	price, and additional metadata including flags indicating which fields
	changed on that particular update.

	Although the exposed timestamp has millisecond granularity, the MT5
	documentation describes the system as operating at microsecond
	internal resolution~\cite{mt5docs}; the millisecond field is what is
	exposed through the Python API.

	\subsection{Dataset Summary}

	The symbol is \texttt{XAUUSDc}. The time range covers the full UTC
	day of February~12, 2026, from 00:00:00 to 23:59:59. The flag used
	retrieves all data updates regardless of whether the bid, ask, or last
	price changed.

	The query returned exactly \textbf{393,252~data points}. The first data point was
	recorded at \textbf{2026-02-12 00:00:00.149~UTC} and the last data point at
	\textbf{2026-02-12 23:59:57.820~UTC}. The bid price ranged from a
	low of \textbf{\$4,878.380} to a high of \textbf{\$5,083.750}, a span
	of \textbf{\$205.370} (20,537~units). The ask price ranged from
	\textbf{\$4,878.620} to \textbf{\$5,083.990}, a span of
	\textbf{\$205.370} (20,537~units).

	\subsection{Unit Size}

	The unit size for \texttt{XAUUSDc} is \textbf{\$0.010} (0.01~unit,
	where 0.01~unit = \$0.01 XAU price change).
	This value is determined by the broker's symbol specification and is
	not configurable by the user. The lowest price resolution of XAU is
	three decimal places: a change of 0.001 corresponds to a
	\$0.001 price movement. Throughout this paper, $\delta = 0.010$
	denotes the unit size, and the bin width used for histogram
	construction equals 0.01~unit ($w = \delta = 0.010$).


	% ════════════════════════════════════════════════════════
	\section{Approach~1: Raw Unit Y-Distribution}
	\label{sec:raw}

	\subsection{Methodology}

	The raw unit approach constructs a market profile histogram directly
	from the 393,252 observed unit prices without any interpolation or
	modification. The procedure begins by extracting the bid and ask
	columns as separate arrays from the dataset. Histogram bin edges are
	computed starting from
	$\lfloor p_{\min}/\delta \rfloor \cdot \delta - \delta$ up to
	$\lceil p_{\max}/\delta \rceil \cdot \delta + \delta$, spaced by
	exactly $\delta = 0.010$. This ensures that every observed price
	falls cleanly within a bin whose width is exactly 0.01~unit. Bin edges
	are rounded to avoid floating-point precision
	artefacts~\cite{goldberg1991}.

	A standard frequency histogram is then computed---the count of data points
	whose price falls within each bin---separately for bid and ask. The
	histogram is plotted horizontally, with price on the vertical axis and
	count on the horizontal axis, creating the conventional market-profile
	appearance where the thickest region corresponds to the price level
	that received the most data updates.

	\subsection{Feature Engineering}

	The feature engineering pipeline for the raw approach consists of the
	following stages. First, the raw unit data from MT5 (a structured
	array) is converted into a tabular format. The millisecond-precision
	timestamp column is transformed into a UTC-aware datetime
	representation. Next, the datetime values are converted to
	floating-point date numbers suitable for high-performance
	plotting~\cite{matplotlib2007}. This pre-conversion is performed once
	before plotting because passing raw datetime objects to the plotting
	library triggers an internal per-element conversion that is extremely
	slow for arrays of 393,252 elements---the pre-conversion reduces
	plotting time from several minutes to under one minute for the full
	dataset.

	The histogram bin edges are constructed using a range function with
	step size equal to $\delta$ and then rounded. For the observed data
	range of \$4,878.380 to \$5,083.990, this produces 20,563 bin edges
	defining 20,562 bins, each exactly \$0.010 wide (0.01~unit).

	\subsection{Output}

	The output is a $2 \times 2$ subplot figure. The top row displays the
	bid data: a horizontal histogram on the left (blue) and a time-series
	line chart on the right (blue). The bottom row displays the ask data
	in the same layout using red. The two rows share their respective
	Y-axes so that price levels align horizontally between the histogram
	and the line chart.

	\begin{figure*}[t]
	\centering
	\includegraphics[width=\textwidth]{raw_ticks_4panel.png}
	\caption{Raw unit Y-distribution histograms (left column) and
	time-series line charts (right column) for bid (top, blue) and ask
	(bottom, red) prices of \texttt{XAUUSDc} on February~12, 2026.
	The dataset contains 393,252 data points. Bin size = 0.01~unit (\$0.010).
	The histogram X-axis shows the count of data points observed at each
	price level.}
	\label{fig:raw_4panel}
	\end{figure*}

	\subsection{Interpretation}

	In the raw histogram (Figure~\ref{fig:raw_4panel}), the count at each
	price level reflects how many times the market's best bid or best ask
	was updated to that exact price. Levels where the market
	consolidated---spending extended time with many small quote
	updates---accumulate high counts and form the thick horizontal bars in
	the profile~\cite{dalton2007}.

	However, when the market jumps from price $A$ to price $B$ in a single
	update without quoting any intermediate level, those intermediate levels
	receive zero counts in the histogram. The raw profile therefore
	contains \emph{gaps}---entire price levels with no
	representation---that correspond to fast directional moves. This is a
	fundamental limitation: the profile faithfully records only what was
	quoted, but it does not capture the price path traversed between
	observations. This motivates the gap-filled approach presented in
	Section~\ref{sec:filled}.


	% ════════════════════════════════════════════════════════
	\section{Approach~2: Gap-Filled (Path-Weighted) Y-Distribution}
	\label{sec:filled}

	\subsection{Motivation}

	Consider a scenario where the bid price moves from \$5,060.000 to
	\$5,060.100 in a single update. In the raw approach, only two price
	levels---\$5,060.000 and \$5,060.100---register a count, while the
	eight intermediate levels (\$5,060.010 through \$5,060.090) receive no
	representation at all. Yet, under the assumption that price is a
	continuous process sampled at discrete intervals, the price must have
	traversed those eight levels to arrive at
	\$5,060.100~\cite{cont2001,bacry2012}. The gap-filled approach
	addresses this by inserting synthetic trail-datapoints at every
	intermediate unit-level price between consecutive observations,
	thereby constructing a profile that reflects the full path traversed
	by the market rather than only the endpoints of each move.

	\subsection{Why Path-Weighting?}
	\label{sec:whypathweight}

	The term \emph{path-weighted} refers to the fact that each price
	level's histogram count is weighted by the number of times the price
	path crossed that level, not merely the number of times it was
	explicitly quoted. The rationale for this weighting rests on three
	observations:

	\begin{enumerate}[leftmargin=*]
	\item \textbf{Continuity of the price process.} Financial prices
	are fundamentally continuous stochastic processes sampled at
	discrete intervals by the exchange or broker
	feed~\cite{cont2001,bacry2012}. Between any two consecutive
	observations at prices $p_A$ and $p_B$, the underlying price
	process must have traversed every intermediate level. The raw
	profile discards this traversal information; the path-weighted
	profile recovers it.

	\item \textbf{Elimination of empty bins.} In the raw profile,
	fast directional moves produce stretches of price levels with zero
	counts, creating discontinuities in the histogram that can mislead
	visual interpretation. Path-weighting ensures that every price
	level between $p_{\min}$ and $p_{\max}$ receives a non-zero count,
	producing a continuous and visually coherent
	profile~\cite{steidlmayer1986}.

	\item \textbf{Traversal as a proxy for significance.} A price
	level that is crossed repeatedly---even by fast-moving price
	swings that do not dwell there---is a level that the market
	revisits often. Such levels frequently correspond to support,
	resistance, or areas of high liquidity~\cite{dalton2007,
	steidlmayer1986}. Path-weighting captures this repeated-traversal
	signal, which raw unit counting misses entirely.
	\end{enumerate}

	In summary, path-weighting transforms the market profile from a
	histogram of \emph{quoting intensity} into a histogram of
	\emph{traversal frequency}, which is a richer and more informative
	representation of where the market has been.

	\subsection{Algorithm}

	The gap-filling algorithm operates on pairs of consecutive data points. For
	each pair $(A, B)$ with prices $p_A$ and $p_B$ and timestamps $t_A$
	and $t_B$ (represented as nanosecond integers for computational
	efficiency), the algorithm first computes the signed unit difference
	$\Delta n = \text{round}((p_B - p_A) / \delta)$. If
	$\|\Delta n\| \le 1$, no interpolation is needed because the two prices
	are adjacent or identical, and the pair is left unchanged. If
	$\|\Delta n\| > 1$, the algorithm inserts $\|\Delta n\| - 1$ intermediate
	rows. Each intermediate row $k$ (where $1 \le k < \|\Delta n\|$)
	receives a price of
	$p_A + k \cdot \text{sgn}(\Delta n) \cdot \delta$ and a timestamp of
	$t_A + \frac{k}{\|\Delta n\|} \cdot (t_B - t_A)$. The timestamp
	interpolation is linear, distributing the intermediate points evenly
	across the time interval between data points $A$ and
	$B$~\cite{dacorogna2001}.

	The implementation is fully vectorised using array operations rather
	than interpreted loops~\cite{numpy2020}. The key operations are
	element repetition (to repeat each source index by the number of units
	in its segment), cumulative summation (to compute segment start
	positions), and element-wise arithmetic for price and timestamp
	interpolation. This vectorised approach processes the entire
	393,252-point dataset in under 2~seconds on a consumer-grade machine.

	The gap-filling is applied independently to the bid series and the ask
	series because the bid and ask prices can move by different amounts on
	the same data update. After gap-filling, the bid series expands from
	393,252 rows to exactly \textbf{4,614,400~rows} (an expansion factor
	of $11.73\times$), and the ask series expands from 393,252 rows to
	exactly \textbf{4,619,918~rows} (an expansion factor of
	$11.75\times$).

	\subsection{Feature Engineering}

	The feature engineering pipeline for the gap-filled approach shares the
	initial stages with the raw approach: data fetching, tabular
	conversion, and datetime derivation are identical. The additional
	stage is the gap-filling itself, which produces two new arrays of
	expanded prices and their corresponding interpolated timestamps.

	For plotting, the expanded nanosecond timestamps must be converted to
	floating-point date numbers. Because the expanded arrays contain
	approximately 4.6 million elements, calling a datetime conversion
	function on individual objects would be prohibitively slow. Instead,
	the conversion is performed arithmetically: the nanosecond integer is
	divided by $10^9$ to get seconds, then by 86,400 to get fractional
	days since the Unix epoch, and finally offset by the appropriate
	constant to align with the plotting library's date
	system~\cite{matplotlib2007}. This bypasses all object-level datetime
	creation and processes the 4.6 million timestamps in a single
	vectorised operation.

	The histogram bins are constructed identically to the raw approach,
	using 0.01-unit (\$0.010) bin widths. Because the gap-filled data has
	the same price range as the raw data (\$4,878.380 to \$5,083.990),
	the number of bins is also 20,562.

	\subsection{Output}

	The output figure has the identical $2 \times 2$ subplot layout as
	Figure~\ref{fig:raw_4panel}.

	\begin{figure*}[t]
	\centering
	\includegraphics[width=\textwidth]{filled_ticks_4panel.png}
	\caption{Gap-filled (path-weighted) Y-distribution histograms
	(left column) and time-series line charts (right column) for bid
	(top, blue) and ask (bottom, red) prices of \texttt{XAUUSDc} on
	February~12, 2026. The bid series contains 4,614,400 data points
	and the ask series contains 4,619,918 data points after
	gap-filling. Bin size = 0.01~unit (\$0.010). The histogram X-axis
	shows the path-weighted count: the number of times each price
	level was traversed between consecutive data points, including synthetic
	intermediate points.}
	\label{fig:filled_4panel}
	\end{figure*}

	\subsection{Interpretation}

	The gap-filled histogram (Figure~\ref{fig:filled_4panel}) answers a
	fundamentally different question than the raw histogram. Where the
	raw profile asks ``how many times was price \emph{quoted} at this
	level,'' the gap-filled profile asks ``how many times did the price
	\emph{path} cross this level.'' The practical consequence is visible
	in the histogram scale: the raw histogram peaks at counts near 120,
	while the gap-filled histogram peaks at counts near 1,200 (consistent
	with the $\approx 11.7\times$ average expansion factor).

	Price regions that were traversed frequently---even if the market did
	not dwell there long enough to generate many raw tick
	updates---accumulate higher counts in the gap-filled profile. The
	large sell-off visible around 16:00~UTC, where the bid price dropped
	from the \$5,050.000 region to the \$4,878.000 region in a
	concentrated burst of activity, produces substantial counts at every
	intermediate price level in the gap-filled profile, whereas those same
	levels appear sparse or empty in the raw profile because the market
	jumped through them in large increments.


	% ════════════════════════════════════════════════════════
	\section{Raw vs.\ Gap-Filled: Comprehensive Comparison}
	\label{sec:comparison}

	\subsection{What Each Approach Measures}

	The raw approach counts only actual tick updates from the broker's
	data feed. When a price level receives a high count, it means the
	market's best bid or ask was actively updated to that level many
	times. This is a direct measurement of \emph{quoting
	intensity}~\cite{ohara1995}: how frequently market participants were
	placing or modifying orders at that price.

	The gap-filled approach counts every tick-level price between
	consecutive updates, including synthetic intermediate points that were
	never explicitly quoted. When a price level receives a high count in
	the gap-filled profile, it means the price \emph{path} crossed that
	level many times---either through actual quoting or through
	interpolation during price jumps. This is a measurement of
	\emph{traversal frequency}.

	\subsection{Detailed Comparison}

	Table~\ref{tab:comparison} presents a comprehensive side-by-side
	comparison of the two approaches across all relevant variables.

	\begin{table*}[t]
	\centering
	\caption{Comprehensive comparison of raw tick vs.\ gap-filled
	(path-weighted) market profile construction for \texttt{XAUUSDc} on
	February~12, 2026.}
	\label{tab:comparison}
	\small
	\begin{tabular}{@{}p{3.8cm}p{5.8cm}p{5.8cm}@{}}
	\toprule
	\textbf{Variable} & \textbf{Raw Tick Profile} & \textbf{Gap-Filled (Path-Weighted) Profile} \\
	\midrule
	Data source &
	Microsecond bid/ask ticks from MT5 &
	Same raw ticks, plus synthetic trail-datapoints \\
	\midrule
	Bid data points &
	393,252 &
	4,614,400 ($11.73\times$ expansion) \\
	\midrule
	Ask data points &
	393,252 &
	4,619,918 ($11.75\times$ expansion) \\
	\midrule
	Price range &
	\$4,878.380 -- \$5,083.990 &
	\$4,878.380 -- \$5,083.990 (identical) \\
	\midrule
	Bin width &
	$\delta = \$0.010$ (1 tick) &
	$\delta = \$0.010$ (1 tick, identical) \\
	\midrule
	Number of bins &
	20,562 &
	20,562 (identical) \\
	\midrule
	Avg.\ count per bin (bid) &
	$393{,}252 / 20{,}537 \approx 19.15$ &
	$4{,}614{,}400 / 20{,}537 \approx 224.7$ \\
	\midrule
	Peak histogram count &
	$\sim$120 &
	$\sim$1,200 \\
	\midrule
	Empty bins in profile &
	Many (fast moves leave gaps) &
	None (all intermediate levels filled) \\
	\midrule
	Profile continuity &
	Discontinuous; sparse in trending regions &
	Continuous; no gaps across entire price range \\
	\midrule
	What is measured &
	Quoting intensity (how often each level was quoted) &
	Traversal frequency (how often price path crossed each level) \\
	\midrule
	Consolidation zones &
	High counts---dense, well-represented &
	Similar to raw (few gaps to fill when moves are small) \\
	\midrule
	Fast directional moves &
	Sparse or empty---underrepresented &
	Well-represented with interpolated traversals \\
	\midrule
	Support/resistance detection &
	Based on quoting density only &
	Enhanced: repeated traversals indicate revisited levels \\
	\midrule
	Interpolation method &
	None &
	Linear timestamp interpolation, tick-step price fill \\
	\midrule
	Computational cost &
	Minimal (direct histogram of raw data) &
	Higher ($\sim$11.7$\times$ more data to process) \\
	\bottomrule
	\end{tabular}
	\end{table*}

	\subsection{Superiority of the Gap-Filled Approach}

	The gap-filled approach produces a fundamentally more representative
	market profile than the raw tick approach. Its advantages are
	threefold:

	\begin{enumerate}[leftmargin=*]
	\item \textbf{Complete price coverage.} The gap-filled profile
	assigns a non-zero count to every price level within the day's
	range, eliminating the misleading empty bins that appear in the
	raw profile during fast moves. This provides a structurally
	complete picture of where the market traded.

	\item \textbf{Traversal information.} By counting path crossings
	rather than only explicit quotes, the gap-filled profile captures
	information about how frequently the market revisited each price
	level---information that is entirely absent from the raw profile.
	This traversal signal is directly relevant to identifying dynamic
	support and resistance~\cite{dalton2007}.

	\item \textbf{Robustness to feed granularity.} Different brokers
	and feed providers update tick data at different rates. A slower
	feed produces larger jumps between consecutive ticks, which
	creates more gaps in the raw profile. The gap-filled approach is
	robust to this variation because it reconstructs the intermediate
	path regardless of the feed's update frequency.
	\end{enumerate}

	The primary trade-off is computational cost: the gap-filling process
	multiplies the dataset by a factor of approximately $11.7\times$ in
	this study, which proportionally increases the time required for
	histogram computation and rendering compared to a typical raw-data
	market profile. For very long time horizons or very volatile
	instruments, this expansion factor could be significantly larger.

	\subsection{Interaction with Bin Size}

	At the 1-tick bin width ($w = 0.010$) used throughout this study,
	the difference between the raw and gap-filled profiles is maximal
	because gaps in the raw profile (empty bins where no tick was
	observed) are filled in by the gap-filling process. As the bin width
	increases, the practical difference between the two approaches
	diminishes because larger bins tend to capture at least some ticks
	even in the raw profile, and the synthetic intermediate points are
	absorbed into the same bins as the observed ticks. At sufficiently
	large bin widths, the raw and gap-filled histograms become nearly
	indistinguishable~\cite{scott1979}.


	% ════════════════════════════════════════════════════════
	\section{Conclusion}
	\label{sec:conclusion}

	This paper presented two approaches to constructing market profiles
	from 393,252 microsecond-resolution bid/ask tick updates of
	\texttt{XAUUSDc} on February~12, 2026, collected from a standard cent
	live trading account on the Exness broker via MetaTrader~5. The raw
	approach counted only observed tick levels, producing a profile that
	reflects quoting intensity. The gap-filled (path-weighted) approach
	interpolated every intermediate price level between consecutive ticks,
	expanding the dataset to 4,614,400 bid rows and 4,619,918 ask rows,
	and producing a profile that reflects path traversal frequency.

	The gap-filled approach yields a more complete and informative market
	profile by eliminating empty bins, capturing traversal information,
	and providing robustness to variations in feed update frequency. The
	primary cost of this approach is computational: the gap-filling
	process multiplies the dataset size by a factor of approximately
	$11.7\times$, which may result in slower calculation times compared to
	typical market profile construction from raw data.

	%% ============================================================================
	%% AUTHOR INFORMATION
	%% ============================================================================
	\newpage
	\vspace{2em}
	\section*{Author Information}
	\label{sec:author_info}

	\begin{center}
	\textbf{Rembrant Oyangoren Albeos}~\href{https://orcid.org/0009-0006-8743-4419}{\includegraphics[height=10pt]{ORCID_icon.png}}
	\end{center}

	\noindent\textbf{ORCID:} \url{https://orcid.org/0009-0006-8743-4419}

	\noindent\textbf{Email:} algorembrant@gmail.com

	\noindent\textbf{Affiliation:} Developer \& Researcher at ConQ

	\noindent\textbf{Organization:} Continual Quasars~\includegraphics[height=7pt]{ContinualQuasars_icon.png}

	\noindent\textbf{Organization GitHub:} \url{https://github.com/ContinualQuasars}

	\noindent\textbf{This Version:} Febuary 14, 2026

	\noindent\textbf{GitHub:} \url{https://github.com/ContinualQuasars/mBA-Profile}


	% ════════════════════════════════════════════════════════
	\newpage
	\vspace{20}
	\begin{thebibliography}{99}

	\bibitem{steidlmayer1986}
	J.~P. Steidlmayer and K.~Koy,
	\textit{Markets and Market Logic},
	Porcupine Press, 1986.

	\bibitem{dalton2007}
	J.~Dalton, E.~Jones, and R.~Dalton,
	\textit{Mind Over Markets: Power Trading with Market Generated
	Information}, Wiley, 2007.

	\bibitem{mt5docs}
	MetaQuotes Software Corp.,
	``MetaTrader~5 Python Integration,''
	\url{https://www.mql5.com/en/docs/python_metatrader5}, 2024.

	\bibitem{metaquotes2024}
	MetaQuotes Software Corp.,
	``MetaTrader~5 Trading Platform,''
	\url{https://www.metatrader5.com}, 2024.

	\bibitem{ohara1995}
	M.~O'Hara,
	\textit{Market Microstructure Theory},
	Blackwell Publishers, 1995.

	\bibitem{hasbrouck2007}
	J.~Hasbrouck,
	\textit{Empirical Market Microstructure: The Institutions, Economics,
	and Econometrics of Securities Trading},
	Oxford University Press, 2007.

	\bibitem{cont2001}
	R.~Cont,
	``Empirical properties of asset returns: Stylized facts and
	statistical issues,''
	\textit{Quantitative Finance}, vol.~1, no.~2, pp.~223--236, 2001.

	\bibitem{bacry2012}
	E.~Bacry, M.~Mastromatteo, and J.-F. Muzy,
	``Hawkes processes in finance,''
	\textit{Market Microstructure and Liquidity}, vol.~1, no.~1, 2015.

	\bibitem{dacorogna2001}
	M.~M. Dacorogna, R.~Gen\c{c}ay, U.~A. M\"{u}ller, R.~B. Olsen, and
	O.~V. Pictet,
	\textit{An Introduction to High-Frequency Finance},
	Academic Press, 2001.

	\bibitem{engle2000}
	R.~F. Engle and J.~R. Russell,
	``Autoregressive conditional duration: A new model for irregularly
	spaced transaction data,''
	\textit{Econometrica}, vol.~66, no.~5, pp.~1127--1162, 1998.

	\bibitem{ane2000}
	T.~An\'{e} and H.~Geman,
	``Order flow, transaction clock, and normality of asset returns,''
	\textit{The Journal of Finance}, vol.~55, no.~5, pp.~2259--2284,
	2000.

	\bibitem{goldberg1991}
	D.~Goldberg,
	``What every computer scientist should know about floating-point
	arithmetic,''
	\textit{ACM Computing Surveys}, vol.~23, no.~1, pp.~5--48, 1991.

	\bibitem{numpy2020}
	C.~R. Harris \textit{et al.},
	``Array programming with NumPy,''
	\textit{Nature}, vol.~585, pp.~357--362, 2020.

	\bibitem{matplotlib2007}
	J.~D. Hunter,
	``Matplotlib: A 2D graphics environment,''
	\textit{Computing in Science \& Engineering}, vol.~9, no.~3,
	pp.~90--95, 2007.

	\bibitem{cmegroup2024}
	CME Group,
	``Gold Futures Contract Specifications,''
	\url{https://www.cmegroup.com/markets/metals/precious/gold.contractSpecs.html},
	2024.

	\bibitem{scott1979}
	D.~W. Scott,
	``On optimal and data-based histograms,''
	\textit{Biometrika}, vol.~66, no.~3, pp.~605--610, 1979.

	\end{thebibliography}



	\end{document}