| """
|
| MAC (MBM AI Cloud) β Comprehensive Project Report
|
| Generates a professional Word document covering the entire platform.
|
|
|
| Run: python docs/generate_final_report.py
|
| Output: docs/MAC-Final-Project-Report.docx
|
| """
|
|
|
| import matplotlib
|
| matplotlib.use('Agg')
|
| import matplotlib.pyplot as plt
|
| import matplotlib.patches as mpatches
|
| from matplotlib.patches import FancyBboxPatch
|
| import numpy as np
|
| from docx import Document
|
| from docx.shared import Inches, Pt, Cm, RGBColor, Emu
|
| from docx.enum.text import WD_ALIGN_PARAGRAPH
|
| from docx.enum.table import WD_ALIGN_VERTICAL
|
| from docx.oxml import OxmlElement
|
| from docx.oxml.ns import qn
|
| from io import BytesIO
|
| from pathlib import Path
|
| import textwrap
|
|
|
| OUT = Path(__file__).parent
|
| plt.rcParams['font.family'] = 'sans-serif'
|
| plt.rcParams['font.sans-serif'] = ['Segoe UI', 'Arial', 'Helvetica', 'DejaVu Sans']
|
|
|
|
|
| C_DARK = '#0d1b2a'
|
| C_PRIMARY = '#1b4965'
|
| C_SEC = '#2a6f97'
|
| C_ACCENT = '#468faf'
|
| C_LIGHT = '#a9d6e5'
|
| C_PALE = '#e8f4f8'
|
| C_ORANGE = '#e76f51'
|
| C_GREEN = '#52b788'
|
| C_TEAL = '#2ec4b2'
|
| C_AMBER = '#f4a261'
|
| C_RED = '#e63946'
|
| C_GRAY = '#6c757d'
|
| C_LGRAY = '#f6f8fa'
|
|
|
|
|
|
|
|
|
|
|
| def _shade(cell, color_hex):
|
| shading = OxmlElement('w:shd')
|
| shading.set(qn('w:fill'), color_hex.lstrip('#'))
|
| shading.set(qn('w:val'), 'clear')
|
| cell._element.get_or_add_tcPr().append(shading)
|
|
|
| def _set_col_widths(table, widths_inches):
|
| for row in table.rows:
|
| for i, w in enumerate(widths_inches):
|
| if i < len(row.cells):
|
| row.cells[i].width = Inches(w)
|
|
|
| def _set_cell_border(cell, **kwargs):
|
| tc = cell._element
|
| tcPr = tc.get_or_add_tcPr()
|
| borders = OxmlElement('w:tcBorders')
|
| for edge, val in kwargs.items():
|
| el = OxmlElement(f'w:{edge}')
|
| el.set(qn('w:val'), val.get('val', 'single'))
|
| el.set(qn('w:sz'), val.get('sz', '4'))
|
| el.set(qn('w:space'), '0')
|
| el.set(qn('w:color'), val.get('color', '000000'))
|
| borders.append(el)
|
| tcPr.append(borders)
|
|
|
| def new_doc():
|
| doc = Document()
|
| style = doc.styles['Normal']
|
| font = style.font
|
| font.name = 'Calibri'
|
| font.size = Pt(11)
|
| for sec in doc.sections:
|
| sec.top_margin = Cm(2.54)
|
| sec.bottom_margin = Cm(2.54)
|
| sec.left_margin = Cm(2.54)
|
| sec.right_margin = Cm(2.54)
|
| return doc
|
|
|
| def body(doc, text, bold=False, italic=False, size=11, justify=True):
|
| p = doc.add_paragraph()
|
| r = p.add_run(text)
|
| r.font.size = Pt(size)
|
| r.font.name = 'Calibri'
|
| r.bold = bold
|
| r.italic = italic
|
| p.paragraph_format.space_after = Pt(6)
|
| if justify:
|
| p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
|
| return p
|
|
|
| def heading(doc, text, level=1):
|
| h = doc.add_heading(text, level=level)
|
| for run in h.runs:
|
| run.font.color.rgb = RGBColor(0x1b, 0x49, 0x65)
|
| return h
|
|
|
| def styled_table(doc, headers, rows, col_widths=None):
|
| t = doc.add_table(rows=1 + len(rows), cols=len(headers))
|
| t.style = 'Table Grid'
|
| t.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
| for i, h in enumerate(headers):
|
| c = t.rows[0].cells[i]
|
| c.text = ''
|
| run = c.paragraphs[0].add_run(h)
|
| run.bold = True
|
| run.font.size = Pt(9)
|
| run.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF)
|
| run.font.name = 'Calibri'
|
| c.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.LEFT
|
| _shade(c, C_PRIMARY)
|
| for ri, row in enumerate(rows):
|
| for ci, val in enumerate(row):
|
| c = t.rows[ri + 1].cells[ci]
|
| c.text = ''
|
| run = c.paragraphs[0].add_run(str(val))
|
| run.font.size = Pt(9)
|
| run.font.name = 'Calibri'
|
| if ri % 2 == 1:
|
| _shade(c, C_LGRAY)
|
| if col_widths:
|
| _set_col_widths(t, col_widths)
|
| doc.add_paragraph()
|
| return t
|
|
|
| def code_block(doc, text, size=8):
|
| p = doc.add_paragraph()
|
| p.paragraph_format.space_before = Pt(4)
|
| p.paragraph_format.space_after = Pt(8)
|
| r = p.add_run(text)
|
| r.font.size = Pt(size)
|
| r.font.name = 'Consolas'
|
| return p
|
|
|
| def add_diagram(doc, buf, width=6.2):
|
| doc.add_picture(buf, width=Inches(width))
|
| last = doc.paragraphs[-1]
|
| last.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
| doc.add_paragraph()
|
|
|
| def bullet(doc, text, bold_prefix=None, size=11):
|
| p = doc.add_paragraph(style='List Bullet')
|
| if bold_prefix:
|
| r1 = p.add_run(bold_prefix)
|
| r1.bold = True
|
| r1.font.size = Pt(size)
|
| r1.font.name = 'Calibri'
|
| r2 = p.add_run(text)
|
| r2.font.size = Pt(size)
|
| r2.font.name = 'Calibri'
|
| else:
|
| r = p.add_run(text)
|
| r.font.size = Pt(size)
|
| r.font.name = 'Calibri'
|
| p.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
|
| return p
|
|
|
|
|
|
|
|
|
|
|
| def _box(ax, x, y, w, h, label, color=C_PRIMARY, tc='white', fs=9, fw='bold', ls=None):
|
| b = FancyBboxPatch((x, y), w, h, boxstyle="round,pad=0.03",
|
| facecolor=color, edgecolor='#dddddd', linewidth=0.6)
|
| ax.add_patch(b)
|
| lines = label.split('\n')
|
| if len(lines) == 1:
|
| ax.text(x + w/2, y + h/2, label, ha='center', va='center',
|
| fontsize=fs, color=tc, fontweight=fw)
|
| else:
|
| top = lines[0]
|
| bot = '\n'.join(lines[1:])
|
| ax.text(x + w/2, y + h*0.62, top, ha='center', va='center',
|
| fontsize=fs, color=tc, fontweight=fw)
|
| ax.text(x + w/2, y + h*0.30, bot, ha='center', va='center',
|
| fontsize=fs - 1.5, color=tc, fontweight='normal', alpha=0.85)
|
|
|
| def _arrow(ax, x1, y1, x2, y2, color='#555555', lw=1.5, style='->'):
|
| ax.annotate('', xy=(x2, y2), xytext=(x1, y1),
|
| arrowprops=dict(arrowstyle=style, color=color, lw=lw))
|
|
|
| def _fig(w=10, h=6, xl=(0, 10), yl=(0, 6)):
|
| fig, ax = plt.subplots(figsize=(w, h))
|
| ax.set_xlim(xl)
|
| ax.set_ylim(yl)
|
| ax.axis('off')
|
| fig.patch.set_facecolor('white')
|
| return fig, ax
|
|
|
| def _to_buf(fig):
|
| buf = BytesIO()
|
| fig.savefig(buf, format='png', dpi=200, bbox_inches='tight',
|
| facecolor='white', edgecolor='none')
|
| plt.close(fig)
|
| buf.seek(0)
|
| return buf
|
|
|
|
|
|
|
|
|
|
|
| def dia_architecture():
|
| fig, ax = _fig(11, 9, (0, 11), (0, 9))
|
| ax.text(5.5, 8.6, 'MAC β System Architecture', ha='center', fontsize=14,
|
| fontweight='bold', color=C_DARK)
|
|
|
|
|
| _box(ax, 2.5, 7.5, 6, 0.7, 'Students / Faculty β College LAN (PWA Frontend)', C_TEAL, 'white', 10)
|
| _arrow(ax, 5.5, 7.5, 5.5, 7.0)
|
|
|
|
|
| _box(ax, 3, 6.2, 5, 0.7, 'Nginx β Reverse Proxy Β· TLS Β· SSE Streaming', C_ORANGE, 'white', 10)
|
| _arrow(ax, 5.5, 6.2, 5.5, 5.7)
|
|
|
|
|
| _box(ax, 1.2, 4.9, 8.6, 0.7, 'FastAPI Gateway β Auth Β· Rate Limiting Β· Guardrails Β· Routing Β· 50+ Endpoints', C_PRIMARY, 'white', 10)
|
|
|
|
|
| _arrow(ax, 2.5, 4.9, 1.5, 4.3)
|
| _arrow(ax, 5.5, 4.9, 5.5, 4.3)
|
| _arrow(ax, 8.5, 4.9, 9.5, 4.3)
|
|
|
|
|
| _box(ax, 0.2, 3.5, 2.6, 0.7, 'PostgreSQL / SQLite\nUsers Β· Logs Β· Quotas', C_SEC, 'white', 8)
|
| _box(ax, 4, 3.5, 3, 0.7, 'LiteLLM Proxy\nRouting Β· Load Balance', C_SEC, 'white', 8)
|
| _box(ax, 8.2, 3.5, 2.6, 0.7, 'Qdrant Β· Redis\nVectors Β· Cache', C_SEC, 'white', 8)
|
|
|
| _arrow(ax, 5.5, 3.5, 5.5, 3.0)
|
|
|
|
|
| _box(ax, 2.5, 2.2, 6, 0.7, 'vLLM / Ollama β GPU Inference Engine', C_DARK, 'white', 10)
|
|
|
|
|
| _box(ax, 1.0, 0.8, 2.2, 0.6, 'GPU Node 1\nRTX 3060 12GB', C_ACCENT, 'white', 7)
|
| _box(ax, 4.4, 0.8, 2.2, 0.6, 'GPU Node 2', C_ACCENT, 'white', 8)
|
| _box(ax, 7.8, 0.8, 2.2, 0.6, 'GPU Node N', C_ACCENT, 'white', 8)
|
| ax.text(6.95, 1.05, '...', fontsize=16, color=C_GRAY, ha='center')
|
| _arrow(ax, 3.5, 2.2, 2.1, 1.45)
|
| _arrow(ax, 5.5, 2.2, 5.5, 1.45)
|
| _arrow(ax, 7.5, 2.2, 8.9, 1.45)
|
|
|
|
|
| _box(ax, 0.2, 2.2, 2, 0.6, 'SearXNG\nWeb Search', C_AMBER, 'white', 8)
|
| _arrow(ax, 1.2, 4.9, 1.2, 2.85, color=C_AMBER, lw=1)
|
|
|
| return _to_buf(fig)
|
|
|
|
|
| def dia_roadmap():
|
| fig, ax = _fig(12, 5.5, (0, 12), (0, 5.5))
|
| phases = [
|
| ('Phase 1', 'API Endpoints'),
|
| ('Phase 2', 'LLM Models'),
|
| ('Phase 3', 'APIβModel\nIntegration'),
|
| ('Phase 4', 'Usage Control'),
|
| ('Phase 5', 'Web Interface'),
|
| ('Phase 6', 'Guardrails'),
|
| ('Phase 7', 'Knowledge\nBase + RAG'),
|
| ('Phase 8', 'Retrieval\n+ Search'),
|
| ]
|
| colors = ['#0d1b2a', '#1b3a4b', '#1b4965', '#2a6f97',
|
| '#468faf', '#61a5c2', '#52b788', '#e76f51']
|
| tcs = ['white'] * 8
|
| xs = [0.3, 3.1, 5.9, 8.7]
|
| bw, bh = 2.4, 1.1
|
| for i, (label, sub) in enumerate(phases):
|
| row = 0 if i < 4 else 1
|
| col = i % 4
|
| x = xs[col]
|
| y = 3.5 if row == 0 else 1.5
|
| _box(ax, x, y, bw, bh, f'{label}\n{sub}', colors[i], tcs[i], fs=10)
|
| for i in range(3):
|
| _arrow(ax, xs[i] + bw, 3.5 + bh/2, xs[i+1], 3.5 + bh/2)
|
| ax.annotate('', xy=(xs[0] + bw/2, 1.5 + bh), xytext=(xs[3] + bw/2, 3.5),
|
| arrowprops=dict(arrowstyle='->', color='#555', lw=1.5,
|
| connectionstyle='arc3,rad=0.4'))
|
| for i in range(3):
|
| _arrow(ax, xs[i] + bw, 1.5 + bh/2, xs[i+1], 1.5 + bh/2)
|
| ax.text(6, 5.1, 'MAC β 8-Phase Build Roadmap', ha='center', fontsize=14,
|
| fontweight='bold', color=C_DARK)
|
| return _to_buf(fig)
|
|
|
|
|
| def dia_auth_flow():
|
| fig, ax = _fig(12, 5, (0, 12), (0, 5))
|
| ax.text(6, 4.7, 'Unified Authentication Flow', ha='center', fontsize=13,
|
| fontweight='bold', color=C_DARK)
|
| steps = [
|
| (0.1, 'Student\nOpens PWA', C_TEAL),
|
| (2.0, 'Enter\nRoll No.\n+ DOB', C_ACCENT),
|
| (3.9, 'POST\n/auth/verify', C_SEC),
|
| (5.8, 'Check\nRegistry\n(DB Match)', C_PRIMARY),
|
| (7.7, 'Create User\nor Return\nExisting', C_GREEN),
|
| (9.8, 'JWT Tokens\n+ Profile', C_DARK),
|
| ]
|
| bw, bh = 1.7, 1.2
|
| y = 2.0
|
| for x, label, color in steps:
|
| _box(ax, x, y, bw, bh, label, color, 'white', 8)
|
| for i in range(len(steps) - 1):
|
| _arrow(ax, steps[i][0] + bw, y + bh/2, steps[i+1][0], y + bh/2)
|
|
|
|
|
| ax.text(10, 1.3, 'First login?\nmust_change_password=true', fontsize=7, color=C_AMBER,
|
| ha='center', fontstyle='italic')
|
| _arrow(ax, 10.65, 2.0, 10.65, 1.55, color=C_AMBER, lw=1.2)
|
| _box(ax, 9.4, 0.3, 2.5, 0.7, 'Set Password\nPOST /auth/set-password', C_AMBER, 'white', 7)
|
|
|
|
|
| ax.text(6.65, 1.4, 'DOB mismatch β 401', fontsize=7, color=C_RED,
|
| ha='center', fontweight='bold')
|
| _arrow(ax, 6.65, 2.0, 6.65, 1.6, color=C_RED, lw=1)
|
|
|
| return _to_buf(fig)
|
|
|
|
|
| def dia_module_structure():
|
| fig, ax = _fig(12, 8, (0, 12), (0, 8))
|
| ax.text(6, 7.7, 'MAC β Module Architecture', ha='center', fontsize=14,
|
| fontweight='bold', color=C_DARK)
|
|
|
|
|
| _box(ax, 0.3, 5.8, 11.4, 1.5, '', C_PALE, C_DARK, 9)
|
| ax.text(6, 7.0, 'Routers (API Layer) β 11 Modules', fontsize=10, fontweight='bold',
|
| color=C_PRIMARY, ha='center')
|
| routers = ['auth', 'explore', 'query', 'usage', 'models', 'keys',
|
| 'quota', 'guardrails', 'rag', 'search', 'integration']
|
| for i, r in enumerate(routers):
|
| col = i % 6
|
| row = i // 6
|
| x = 0.6 + col * 1.9
|
| y = 6.35 - row * 0.55
|
| _box(ax, x, y, 1.7, 0.45, r, C_PRIMARY, 'white', 7)
|
|
|
|
|
| _box(ax, 0.3, 3.8, 11.4, 1.5, '', '#f0f4f8', C_DARK, 9)
|
| ax.text(6, 5.0, 'Services (Business Logic) β 7 Modules', fontsize=10, fontweight='bold',
|
| color=C_SEC, ha='center')
|
| services = ['auth_service', 'llm_service', 'model_service', 'guardrail_service',
|
| 'rag_service', 'search_service', 'usage_service']
|
| for i, s in enumerate(services):
|
| x = 0.5 + i * 1.6
|
| _box(ax, x, 4.1, 1.45, 0.5, s.replace('_', '\n'), C_SEC, 'white', 6.5)
|
|
|
|
|
| _box(ax, 0.3, 2.5, 5.2, 0.9, '', '#fff3e0', C_DARK, 9)
|
| ax.text(2.9, 3.15, 'Middleware', fontsize=10, fontweight='bold', color=C_ORANGE, ha='center')
|
| _box(ax, 0.6, 2.65, 2.2, 0.5, 'Auth\nMiddleware', C_ORANGE, 'white', 7)
|
| _box(ax, 3.1, 2.65, 2.2, 0.5, 'Rate\nLimiter', C_ORANGE, 'white', 7)
|
|
|
|
|
| _box(ax, 6.0, 2.5, 5.7, 0.9, '', '#e8f5e9', C_DARK, 9)
|
| ax.text(8.85, 3.15, 'Database Models (ORM)', fontsize=10, fontweight='bold', color=C_GREEN, ha='center')
|
| orm = ['User', 'StudentRegistry', 'UsageLog', 'GuardrailRule', 'QuotaOverride', 'RAG*']
|
| for i, m in enumerate(orm):
|
| x = 6.2 + i * 0.9
|
| _box(ax, x, 2.65, 0.8, 0.5, m, C_GREEN, 'white', 6)
|
|
|
|
|
| _box(ax, 0.3, 1.2, 5.2, 0.9, '', '#e3f2fd', C_DARK, 9)
|
| ax.text(2.9, 1.85, 'Pydantic Schemas β 11 Modules', fontsize=9, fontweight='bold',
|
| color=C_ACCENT, ha='center')
|
| ax.text(2.9, 1.45, 'auth Β· chat Β· explore Β· guardrails Β· integration\n'
|
| 'keys Β· models Β· quota Β· rag Β· search Β· usage',
|
| fontsize=7, color=C_DARK, ha='center', linespacing=1.4)
|
|
|
|
|
| _box(ax, 6.0, 1.2, 5.7, 0.9, '', '#fce4ec', C_DARK, 9)
|
| ax.text(8.85, 1.85, 'Utils & Config', fontsize=10, fontweight='bold', color=C_RED, ha='center')
|
| ax.text(8.85, 1.45, 'security.py Β· config.py Β· database.py\n'
|
| 'JWT Β· bcrypt Β· SQLAlchemy async engine',
|
| fontsize=7, color=C_DARK, ha='center', linespacing=1.4)
|
|
|
|
|
| _box(ax, 0.3, 0.1, 11.4, 0.7, '', '#e0e0e0', C_DARK, 9)
|
| ax.text(6, 0.45, 'External: PostgreSQL/SQLite Β· Redis Β· Qdrant Β· SearXNG Β· vLLM/Ollama Β· LiteLLM Β· Nginx',
|
| fontsize=9, fontweight='bold', color=C_DARK, ha='center')
|
|
|
|
|
| _arrow(ax, 6, 5.8, 6, 5.35, color=C_GRAY, lw=1.5)
|
| _arrow(ax, 2.9, 3.8, 2.9, 3.45, color=C_GRAY, lw=1.5)
|
| _arrow(ax, 8.85, 3.8, 8.85, 3.45, color=C_GRAY, lw=1.5)
|
|
|
| return _to_buf(fig)
|
|
|
|
|
| def dia_guardrail_pipeline():
|
| fig, ax = _fig(12, 5, (0, 12), (0, 5))
|
| ax.text(6, 4.7, 'Guardrails β Input & Output Filtering Pipeline', ha='center',
|
| fontsize=13, fontweight='bold', color=C_DARK)
|
| y = 2.0
|
| bw, bh = 1.8, 0.9
|
| pipeline = [
|
| (0.1, 'User\nInput', C_TEAL),
|
| (2.1, 'Prompt\nInjection\nDetect', C_RED),
|
| (4.1, 'Harmful\nContent\nFilter', C_ORANGE),
|
| (6.1, 'LLM\nInference', C_PRIMARY),
|
| (8.1, 'PII\nRedaction\nFilter', C_AMBER),
|
| (10.1, 'Safe\nResponse', C_GREEN),
|
| ]
|
| for x, label, color in pipeline:
|
| _box(ax, x, y, bw, bh, label, color, 'white', 8)
|
| for i in range(len(pipeline) - 1):
|
| _arrow(ax, pipeline[i][0] + bw, y + bh/2, pipeline[i+1][0], y + bh/2)
|
| checks_in = ['Jailbreak / override attempts', 'Academic dishonesty keywords', 'Max prompt length (32K chars)']
|
| for i, txt in enumerate(checks_in):
|
| ax.text(2.1, 1.5 - i*0.35, f'β’ {txt}', fontsize=7.5, color=C_DARK)
|
| ax.text(2.1, 1.5 - 3*0.35 + 0.1, 'INPUT CHECKS', fontsize=7, color=C_RED, fontweight='bold')
|
| checks_out = ['Email redaction', 'Phone number redaction', 'ID/card number masking']
|
| for i, txt in enumerate(checks_out):
|
| ax.text(8.1, 1.5 - i*0.35, f'β’ {txt}', fontsize=7.5, color=C_DARK)
|
| ax.text(8.1, 1.5 - 3*0.35 + 0.1, 'OUTPUT CHECKS', fontsize=7, color=C_AMBER, fontweight='bold')
|
| return _to_buf(fig)
|
|
|
|
|
| def dia_rag_pipeline():
|
| fig, ax = _fig(12, 7, (0, 12), (0, 7))
|
| ax.text(6, 6.7, 'RAG Pipeline β Ingestion & Query', ha='center',
|
| fontsize=13, fontweight='bold', color=C_DARK)
|
| ax.text(1, 5.9, 'Document Ingestion', fontsize=10, fontweight='bold', color=C_SEC)
|
| ing = [
|
| (0.2, 'Upload\nPDF/DOCX/TXT', C_TEAL),
|
| (2.5, 'Chunking\n512 tokens\noverlap 50', C_SEC),
|
| (4.8, 'Embedding\n768-dim vectors', C_ACCENT),
|
| (7.1, 'Store in\nQdrant', C_PRIMARY),
|
| ]
|
| bw, bh = 2, 0.8
|
| y = 4.8
|
| for x, label, color in ing:
|
| _box(ax, x, y, bw, bh, label, color, 'white', 9)
|
| for i in range(len(ing) - 1):
|
| _arrow(ax, ing[i][0] + bw, y + bh/2, ing[i+1][0], y + bh/2)
|
| ax.plot([0.5, 11.5], [4.3, 4.3], color='#ddd', lw=1, ls='--')
|
| ax.text(1, 4.0, 'Query & Retrieval', fontsize=10, fontweight='bold', color=C_SEC)
|
| q_top = [
|
| (0.2, 'Student\nQuestion', C_TEAL),
|
| (2.5, 'Embed\nQuery', C_SEC),
|
| (4.8, 'Similarity\nSearch (top-k)', C_ACCENT),
|
| ]
|
| y2 = 3.0
|
| for x, label, color in q_top:
|
| _box(ax, x, y2, bw, bh, label, color, 'white', 9)
|
| for i in range(len(q_top) - 1):
|
| _arrow(ax, q_top[i][0] + bw, y2 + bh/2, q_top[i+1][0], y2 + bh/2)
|
| _arrow(ax, 6.8, y2 + bh/2, 7.1, y2 + bh/2)
|
| _box(ax, 7.1, 2.5, 2.5, 1.3, 'LLM\nRetrieved Chunks\n+ Question', C_PRIMARY, 'white', 9)
|
| _arrow(ax, 9.6, 3.15, 10, 3.15)
|
| _box(ax, 9.7, 1.4, 2, 0.8, 'Answer with\nCitations', C_GREEN, 'white', 9)
|
| ax.annotate('', xy=(5.8, 3.8), xytext=(8.1, 4.8),
|
| arrowprops=dict(arrowstyle='->', color=C_AMBER, lw=1.5, ls='--'))
|
| ax.text(7.6, 4.35, 'vector index', fontsize=7, color=C_AMBER, fontstyle='italic', rotation=-30)
|
| return _to_buf(fig)
|
|
|
|
|
| def dia_request_pipeline():
|
| fig, ax = _fig(12, 4.5, (0, 12), (0, 4.5))
|
| ax.text(6, 4.1, 'Request Processing Pipeline', ha='center', fontsize=13,
|
| fontweight='bold', color=C_DARK)
|
| steps = [
|
| (0.1, 'Incoming\nRequest', C_TEAL),
|
| (2.0, 'JWT / API\nKey Verify', C_SEC),
|
| (3.9, 'Rate\nLimit\nCheck', C_ACCENT),
|
| (5.8, 'Token\nQuota\nCheck', C_PRIMARY),
|
| (7.7, 'Guardrail\nInput\nFilter', C_ORANGE),
|
| (9.8, 'LLM\nInference', C_GREEN),
|
| ]
|
| bw, bh = 1.7, 1.1
|
| y = 1.5
|
| for x, label, color in steps:
|
| _box(ax, x, y, bw, bh, label, color, 'white', 8)
|
| for i in range(len(steps) - 1):
|
| _arrow(ax, steps[i][0] + bw, y + bh/2, steps[i+1][0], y + bh/2)
|
| for i, label in [(1, '401'), (2, '429'), (3, '429'), (4, '400')]:
|
| ax.annotate('', xy=(steps[i][0] + bw/2, y - 0.15),
|
| xytext=(steps[i][0] + bw/2, y),
|
| arrowprops=dict(arrowstyle='->', color=C_RED, lw=1.2))
|
| ax.text(steps[i][0] + bw/2, y - 0.4, f'Reject {label}', ha='center',
|
| fontsize=7, color=C_RED, fontweight='bold')
|
| return _to_buf(fig)
|
|
|
|
|
| def dia_db_schema():
|
| fig, ax = _fig(12, 7, (0, 12), (0, 7))
|
| ax.text(6, 6.7, 'Database Schema (Entity-Relationship)', ha='center',
|
| fontsize=13, fontweight='bold', color=C_DARK)
|
|
|
| tables = [
|
| (0.2, 4.0, 'student_registry', ['id (PK)', 'roll_number (UQ)', 'name', 'department', 'dob', 'batch_year']),
|
| (4.2, 4.0, 'users', ['id (PK)', 'roll_number (UQ)', 'name', 'email', 'department',
|
| 'role', 'password_hash', 'must_change_password',
|
| 'is_active', 'api_key (UQ)', 'created_at']),
|
| (8.5, 4.8, 'refresh_tokens', ['id (PK)', 'user_id (FK)', 'token_hash', 'expires_at', 'revoked']),
|
| (8.5, 2.6, 'usage_logs', ['id (PK)', 'user_id (FK)', 'model', 'endpoint',
|
| 'tokens_in', 'tokens_out', 'latency_ms']),
|
| (0.2, 1.0, 'quota_overrides', ['id (PK)', 'user_id (FK, UQ)', 'daily_tokens',
|
| 'requests_per_hour', 'reason']),
|
| (4.2, 1.0, 'guardrail_rules', ['id (PK)', 'category', 'action', 'pattern',
|
| 'enabled', 'priority']),
|
| (8.5, 0.5, 'rag_collections\n+ rag_documents', ['collection: id, name, description',
|
| 'document: id, collection_id (FK)',
|
| 'title, chunk_count, status']),
|
| ]
|
| for x, y_base, title, fields in tables:
|
| w = 3.8 if 'users' in title else 3.6 if 'rag_' in title else 3.2
|
| h = 0.18 * len(fields) + 0.5
|
| _box(ax, x, y_base, w, h, '', '#f0f4f8', C_DARK, 9)
|
| ax.text(x + w/2, y_base + h - 0.2, title, fontsize=8, fontweight='bold',
|
| color=C_PRIMARY, ha='center')
|
| for i, f in enumerate(fields):
|
| ax.text(x + 0.15, y_base + h - 0.42 - i*0.18, f, fontsize=6, color=C_DARK,
|
| family='monospace')
|
|
|
|
|
| _arrow(ax, 3.4, 5.0, 4.2, 5.0, color=C_SEC, lw=1.5)
|
| _arrow(ax, 8.0, 5.2, 8.5, 5.2, color=C_SEC, lw=1.5)
|
| _arrow(ax, 8.0, 4.5, 8.5, 3.6, color=C_SEC, lw=1.5)
|
| _arrow(ax, 5.5, 4.0, 1.8, 2.2, color=C_SEC, lw=1)
|
|
|
| return _to_buf(fig)
|
|
|
|
|
| def dia_deployment():
|
| fig, ax = _fig(11, 6, (0, 11), (0, 6))
|
| ax.text(5.5, 5.7, 'Docker Compose β Deployment Architecture', ha='center',
|
| fontsize=13, fontweight='bold', color=C_DARK)
|
| services = [
|
| (0.3, 3.5, 'nginx\n:80/:443'),
|
| (2.5, 3.5, 'mac\n(FastAPI)\n:8000'),
|
| (4.7, 3.5, 'postgres\n:5432'),
|
| (6.9, 3.5, 'redis\n:6379'),
|
| (0.3, 1.5, 'qdrant\n:6333'),
|
| (2.5, 1.5, 'searxng\n:8080'),
|
| (4.7, 1.5, 'litellm\n:4000'),
|
| (6.9, 1.5, 'vLLM/Ollama\n:11434'),
|
| ]
|
| colors = [C_ORANGE, C_PRIMARY, C_SEC, C_AMBER, C_ACCENT, C_TEAL, C_GREEN, C_DARK]
|
| bw, bh = 2.0, 1.3
|
| for (x, y, label), color in zip(services, colors):
|
| _box(ax, x, y, bw, bh, label, color, 'white', 8)
|
|
|
|
|
| _box(ax, 9.2, 2.5, 1.6, 2.3, 'mac-net\nbridge\nnetwork', '#e0e0e0', C_DARK, 7)
|
|
|
|
|
| _arrow(ax, 2.3, 4.15, 2.5, 4.15, color=C_GRAY, lw=1.5)
|
|
|
| _arrow(ax, 4.5, 4.15, 4.7, 4.15, color=C_GRAY, lw=1.2)
|
| _arrow(ax, 4.5, 3.7, 6.9, 3.7, color=C_GRAY, lw=1.2)
|
|
|
| _arrow(ax, 3.5, 3.5, 1.3, 2.8, color=C_GRAY, lw=1)
|
| _arrow(ax, 3.5, 3.5, 3.5, 2.8, color=C_GRAY, lw=1)
|
| _arrow(ax, 3.5, 3.5, 5.7, 2.8, color=C_GRAY, lw=1)
|
|
|
| _arrow(ax, 6.7, 2.15, 6.9, 2.15, color=C_GRAY, lw=1.2)
|
|
|
| return _to_buf(fig)
|
|
|
|
|
| def dia_frontend_flow():
|
| fig, ax = _fig(12, 5, (0, 12), (0, 5))
|
| ax.text(6, 4.7, 'Frontend β User Journey Flow', ha='center', fontsize=13,
|
| fontweight='bold', color=C_DARK)
|
|
|
| pages = [
|
| (0.2, 2.5, 'Auth Page\nRoll No + DOB', C_TEAL),
|
| (2.3, 2.5, 'Set Password\n(first login)', C_AMBER),
|
| (4.4, 2.5, 'Dashboard\nStats Β· Models', C_PRIMARY),
|
| (6.5, 2.5, 'Chat\nStreaming AI', C_SEC),
|
| (8.6, 2.5, 'Settings\nProfile Β· Key', C_ACCENT),
|
| (10.0, 1.0, 'Admin Panel\nUsers Β· Registry', C_ORANGE),
|
| ]
|
| bw, bh = 1.9, 1.2
|
| for x, y, label, color in pages:
|
| _box(ax, x, y, bw, bh, label, color, 'white', 8)
|
|
|
| _arrow(ax, 0.2 + bw, 3.1, 2.3, 3.1)
|
|
|
| _arrow(ax, 2.3 + bw, 3.1, 4.4, 3.1)
|
|
|
| _arrow(ax, 4.4 + bw, 3.1, 6.5, 3.1)
|
|
|
| _arrow(ax, 4.4 + bw, 2.8, 8.6, 2.8, lw=1)
|
|
|
| _arrow(ax, 5.4, 2.5, 10.9, 1.0 + bh, color=C_ORANGE, lw=1)
|
| ax.text(8.0, 1.8, 'admin only', fontsize=7, color=C_ORANGE, fontstyle='italic')
|
|
|
| return _to_buf(fig)
|
|
|
|
|
| def dia_search_flow():
|
| fig, ax = _fig(12, 4.5, (0, 12), (0, 4.5))
|
| ax.text(6, 4.2, 'Grounded Search Pipeline', ha='center', fontsize=13,
|
| fontweight='bold', color=C_DARK)
|
| steps = [
|
| (0.1, 'User\nQuery', C_TEAL),
|
| (2.3, 'SearXNG\nMeta-Search', C_ORANGE),
|
| (4.5, 'Top Results\nSnippets', C_AMBER),
|
| (6.7, 'LLM + Context\nGenerate Answer', C_PRIMARY),
|
| (9.2, 'Grounded\nResponse\n+ Citations', C_GREEN),
|
| ]
|
| bw, bh = 2, 1
|
| y = 1.5
|
| for x, label, color in steps:
|
| _box(ax, x, y, bw, bh, label, color, 'white', 9)
|
| for i in range(len(steps) - 1):
|
| _arrow(ax, steps[i][0] + bw, y + bh/2, steps[i+1][0], y + bh/2)
|
| _box(ax, 4.5, 0.2, 2, 0.6, 'Result Cache\n(Redis)', '#e8e8e8', C_DARK, 7, 'normal')
|
| _arrow(ax, 5.5, 1.5, 5.5, 0.85, color=C_GRAY, lw=1, style='<->')
|
| return _to_buf(fig)
|
|
|
|
|
| def dia_test_coverage():
|
| fig, ax = plt.subplots(figsize=(9, 4.5))
|
| modules = ['Auth', 'Query', 'Models', 'Explore', 'Usage', 'Keys',
|
| 'Quota', 'Guards', 'RAG', 'Search', 'Integration']
|
| tests = [15, 12, 8, 6, 8, 6, 5, 7, 8, 5, 6]
|
| colors_list = [C_PRIMARY, C_SEC, C_ACCENT, C_TEAL, C_GREEN,
|
| C_AMBER, C_ORANGE, C_RED, C_DARK, C_SEC, C_ACCENT]
|
| bars = ax.barh(modules, tests, color=colors_list, height=0.55, edgecolor='white', linewidth=0.5)
|
| for bar, v in zip(bars, tests):
|
| ax.text(bar.get_width() + 0.2, bar.get_y() + bar.get_height()/2,
|
| f'{v}', va='center', fontsize=10, fontweight='bold', color=C_DARK)
|
| ax.set_xlabel('Number of Test Cases', fontsize=10, color=C_DARK)
|
| ax.set_title('Test Coverage Across Modules (81+ tests)', fontsize=13,
|
| fontweight='bold', color=C_DARK, pad=12)
|
| ax.set_xlim(0, 20)
|
| ax.spines['top'].set_visible(False)
|
| ax.spines['right'].set_visible(False)
|
| ax.spines['left'].set_visible(False)
|
| ax.tick_params(left=False, labelsize=9)
|
| ax.grid(axis='x', alpha=0.2)
|
| fig.tight_layout()
|
| return _to_buf(fig)
|
|
|
|
|
|
|
|
|
|
|
|
|
| def generate_report():
|
| doc = new_doc()
|
|
|
|
|
| for _ in range(5):
|
| doc.add_paragraph()
|
|
|
| p = doc.add_paragraph()
|
| p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
| r = p.add_run('MAC')
|
| r.font.size = Pt(52)
|
| r.bold = True
|
| r.font.color.rgb = RGBColor(0x1b, 0x49, 0x65)
|
|
|
| p = doc.add_paragraph()
|
| p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
| r = p.add_run('MBM AI Cloud')
|
| r.font.size = Pt(22)
|
| r.font.color.rgb = RGBColor(0x2a, 0x6f, 0x97)
|
|
|
| doc.add_paragraph()
|
|
|
| p = doc.add_paragraph()
|
| p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
| r = p.add_run('Comprehensive Project Report')
|
| r.font.size = Pt(18)
|
| r.bold = True
|
| r.font.color.rgb = RGBColor(0x0d, 0x1b, 0x2a)
|
|
|
| p = doc.add_paragraph()
|
| p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
| r = p.add_run('Self-Hosted AI Inference Platform for MBM Engineering College')
|
| r.font.size = Pt(12)
|
| r.font.color.rgb = RGBColor(0x6c, 0x75, 0x7d)
|
|
|
| for _ in range(4):
|
| doc.add_paragraph()
|
|
|
| info_lines = [
|
| ('Developed by:', 'Prof. Abhishek Gaur'),
|
| ('Department:', 'Computer Science & Engineering'),
|
| ('Institution:', 'MBM University, Jodhpur'),
|
| ('Date:', '08 April 2026'),
|
| ]
|
| for label, value in info_lines:
|
| p = doc.add_paragraph()
|
| p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
| r1 = p.add_run(label + ' ')
|
| r1.font.size = Pt(11)
|
| r1.font.color.rgb = RGBColor(0x6c, 0x75, 0x7d)
|
| r2 = p.add_run(value)
|
| r2.font.size = Pt(11)
|
| r2.bold = True
|
| r2.font.color.rgb = RGBColor(0x0d, 0x1b, 0x2a)
|
|
|
| doc.add_page_break()
|
|
|
|
|
| heading(doc, 'Table of Contents', level=1)
|
|
|
| toc_items = [
|
| ('1.', 'Executive Summary'),
|
| ('2.', 'Introduction & Motivation'),
|
| ('3.', 'System Architecture'),
|
| ('4.', 'Technology Stack'),
|
| ('5.', 'Module Breakdown'),
|
| (' 5.1', 'Authentication Module'),
|
| (' 5.2', 'Explore & Discovery Module'),
|
| (' 5.3', 'Query & Inference Module'),
|
| (' 5.4', 'Usage Tracking Module'),
|
| (' 5.5', 'Model Management Module'),
|
| (' 5.6', 'API Key Management Module'),
|
| (' 5.7', 'Quota & Rate Limiting Module'),
|
| (' 5.8', 'Guardrails Module'),
|
| (' 5.9', 'RAG (Retrieval-Augmented Generation) Module'),
|
| (' 5.10', 'Search & Grounded Answers Module'),
|
| (' 5.11', 'Integration & Worker Management Module'),
|
| ('6.', 'Database Design'),
|
| ('7.', 'Frontend β Progressive Web App'),
|
| ('8.', 'Security Architecture'),
|
| ('9.', 'Deployment & Infrastructure'),
|
| ('10.', 'Testing Strategy'),
|
| ('11.', 'API Endpoint Reference'),
|
| ('12.', 'Project Structure'),
|
| ('13.', 'Build Roadmap & Phase Summary'),
|
| ('14.', 'Future Scope'),
|
| ('15.', 'Conclusion'),
|
| ]
|
| for num, title_text in toc_items:
|
| p = doc.add_paragraph()
|
| p.paragraph_format.space_after = Pt(2)
|
| r1 = p.add_run(num + ' ')
|
| r1.font.size = Pt(11)
|
| r1.bold = True
|
| r2 = p.add_run(title_text)
|
| r2.font.size = Pt(11)
|
|
|
| doc.add_page_break()
|
|
|
|
|
|
|
|
|
| heading(doc, '1. Executive Summary', level=1)
|
|
|
| body(doc, 'MAC (MBM AI Cloud) is a fully self-hosted, zero-cloud AI inference platform purpose-built for '
|
| 'MBM Engineering College, Jodhpur. The platform provides students and faculty with on-premise access to '
|
| 'state-of-the-art large language models β for code generation, mathematical reasoning, general text tasks, '
|
| 'image understanding, and speech transcription β through a clean, well-documented REST API and a modern '
|
| 'Progressive Web App (PWA) frontend.')
|
|
|
| body(doc, 'The system is designed around a core principle: no student data ever leaves the college network, '
|
| 'and no cloud API subscription is required. All inference runs on college-owned GPU hardware. The platform '
|
| 'starts on a single PC and scales horizontally to 30+ nodes with zero code changes β only new vLLM worker '
|
| 'containers are registered.')
|
|
|
| heading(doc, 'Key Highlights', level=2)
|
| highlights = [
|
| ('50+ REST API endpoints', ' spanning 11 modules β auth, explore, query, usage, models, keys, quota, guardrails, RAG, search, and integration.'),
|
| ('OpenAI-compatible API', ' β students use the official OpenAI Python SDK with only a base_url change.'),
|
| ('Unified DOB-based authentication', ' β students verify using their Registration Number and Date of Birth from the college registry; no separate signup needed.'),
|
| ('Content guardrails', ' β input checks for prompt injection, harmful content, academic dishonesty; output checks for PII redaction.'),
|
| ('RAG pipeline', ' β document ingestion, chunking, vector embedding via Qdrant, and retrieval-augmented generation.'),
|
| ('Grounded web search', ' β SearXNG meta-search integrated with LLM for cited, factual answers.'),
|
| ('PWA frontend', ' β installable app with dashboard, multi-session chat (SSE streaming), settings, and admin panel.'),
|
| ('81+ automated tests', ' across all 11 modules with pytest-asyncio β 100% passing.'),
|
| ('Docker Compose', ' one-command deployment with 7 containerised services.'),
|
| ]
|
| for bold_part, rest in highlights:
|
| bullet(doc, rest, bold_prefix=bold_part)
|
|
|
| doc.add_page_break()
|
|
|
|
|
|
|
|
|
| heading(doc, '2. Introduction & Motivation', level=1)
|
|
|
| body(doc, 'The rapid advancement of Large Language Models (LLMs) has created a fundamental shift in how software is '
|
| 'developed, studied, and taught. Students at engineering colleges increasingly rely on AI tools for code generation, '
|
| 'debugging, mathematical reasoning, and research assistance. However, commercial AI services such as OpenAI GPT-4, '
|
| 'Google Gemini, and Anthropic Claude impose significant per-token costs that are prohibitive for educational institutions '
|
| 'serving hundreds of students.')
|
|
|
| body(doc, 'MBM Engineering College possesses substantial computing resources in its laboratories β specifically, '
|
| 'NVIDIA RTX 3060 (12 GB VRAM) GPUs across multiple lab PCs. These machines sit idle after lab hours and during '
|
| 'weekends. MAC repurposes this existing hardware to serve state-of-the-art open-source AI models, eliminating '
|
| 'the need for any cloud subscriptions.')
|
|
|
| heading(doc, 'Problem Statement', level=2)
|
| body(doc, 'Design and implement a self-hosted AI inference platform that provides MBM students and faculty with '
|
| 'free, private, and high-performance access to multiple AI models through a standardised API interface, '
|
| 'with built-in authentication, usage control, content safety guardrails, and knowledge retrieval capabilities.')
|
|
|
| heading(doc, 'Objectives', level=2)
|
| objectives = [
|
| 'Provide zero-cost AI model access to all students on the college LAN.',
|
| 'Ensure complete data privacy β all queries and responses stay within the college network.',
|
| 'Support multiple model types: text generation, code assistance, mathematical reasoning, image understanding, and speech transcription.',
|
| 'Expose an OpenAI-compatible API so existing SDKs and tutorials work without modification.',
|
| 'Implement robust authentication, rate limiting, and content safety guardrails.',
|
| 'Build a Progressive Web App for students who prefer a ChatGPT-like interface.',
|
| 'Enable Retrieval-Augmented Generation (RAG) over college textbooks and lecture notes.',
|
| 'Design for horizontal scalability β start on one PC, expand to N PCs with no code changes.',
|
| ]
|
| for obj in objectives:
|
| bullet(doc, obj)
|
|
|
| doc.add_page_break()
|
|
|
|
|
|
|
|
|
| heading(doc, '3. System Architecture', level=1)
|
|
|
| body(doc, 'The MAC platform follows a layered microservices architecture. Requests flow from the client through '
|
| 'a reverse proxy, into the API gateway (which handles authentication, rate limiting, and routing), then to '
|
| 'the appropriate service β either the LLM inference engine, the vector database for RAG, the web search engine, '
|
| 'or the relational database for user/usage data.')
|
|
|
| add_diagram(doc, dia_architecture())
|
|
|
| body(doc, 'Figure 1: Complete system architecture showing the request flow from the student client through Nginx, '
|
| 'the FastAPI gateway, and into the backend services.', italic=True, size=10, justify=False)
|
|
|
| heading(doc, 'Request Flow', level=2)
|
| body(doc, 'A typical chat request follows this path:')
|
| steps = [
|
| 'The student opens the PWA or sends an API request from their code.',
|
| 'Nginx receives the request, terminates TLS, and forwards it to the FastAPI gateway.',
|
| 'The FastAPI middleware validates the JWT token or API key.',
|
| 'The rate limiter checks hourly request count and daily token quota.',
|
| 'Input guardrails scan the prompt for injection attempts and harmful content.',
|
| 'The query router forwards the request to LiteLLM, which routes to the appropriate vLLM worker.',
|
| 'vLLM performs GPU-accelerated inference and returns the response.',
|
| 'Output guardrails scan the response for PII and harmful content.',
|
| 'Usage is logged to the database and the response is returned to the student.',
|
| ]
|
| for i, step in enumerate(steps, 1):
|
| bullet(doc, f'{step}')
|
|
|
| add_diagram(doc, dia_request_pipeline())
|
| body(doc, 'Figure 2: Request processing pipeline showing validation stages and rejection points.', italic=True, size=10, justify=False)
|
|
|
| doc.add_page_break()
|
|
|
|
|
|
|
|
|
| heading(doc, '4. Technology Stack', level=1)
|
|
|
| body(doc, 'Each technology was evaluated against alternatives before selection. The guiding criteria were: '
|
| 'open-source licensing, self-hostability, async performance, and community support.')
|
|
|
| styled_table(doc, ['Layer', 'Technology', 'Version', 'Rationale'],
|
| [
|
| ['API Framework', 'FastAPI', '0.115.6', 'Async, auto-generated OpenAPI docs, type-safe with Pydantic'],
|
| ['Runtime', 'Python', '3.11+', 'Rich AI/ML ecosystem, asyncio support, broad library availability'],
|
| ['ASGI Server', 'Uvicorn', '0.34.0', 'High-performance, supports HTTP/1.1 and WebSockets'],
|
| ['Database (Prod)', 'PostgreSQL 16', '-', 'ACID-compliant, excellent JSON support, concurrent writes'],
|
| ['Database (Dev)', 'SQLite + aiosqlite', '0.20.0', 'Zero-config for development, file-based, async support'],
|
| ['ORM', 'SQLAlchemy', '2.0.36', 'Async engine, mature, supports PostgreSQL and SQLite'],
|
| ['Cache', 'Redis 7', '-', 'In-memory, sub-ms latency, rate limiting primitives'],
|
| ['LLM Inference', 'vLLM / Ollama', '-', 'PagedAttention, continuous batching, OpenAI-compatible API'],
|
| ['Model Router', 'LiteLLM', '-', 'Unified proxy, load balancing, health checks, fallback chains'],
|
| ['Vector DB', 'Qdrant', '-', 'Purpose-built for embeddings, fast similarity search'],
|
| ['Web Search', 'SearXNG', '-', 'Self-hosted meta-search, no API keys required'],
|
| ['Reverse Proxy', 'Nginx', '-', 'TLS termination, SSE streaming, request buffering'],
|
| ['Auth Tokens', 'python-jose (JWT)', '-', 'HS256 signing, stateless authentication'],
|
| ['Password Hashing', 'bcrypt', '4.2.1', 'Industry standard, async-wrapped to avoid blocking'],
|
| ['HTTP Client', 'httpx', '0.28.1', 'Async, streaming support, connection pooling'],
|
| ['SSE', 'sse-starlette', '2.2.1', 'Server-Sent Events for streaming LLM responses'],
|
| ['Containers', 'Docker + Compose', '-', 'Reproducible deployment, one-command startup'],
|
| ['Frontend', 'Vanilla JS PWA', '-', 'Zero build step, installable, offline-capable'],
|
| ['Charts', 'Chart.js', '4.4.7', 'Lightweight, responsive donut charts for dashboard'],
|
| ['Testing', 'pytest + pytest-asyncio', '-', 'Async test support, fixtures, parameterisation'],
|
| ], col_widths=[1.3, 1.5, 0.6, 3.2])
|
|
|
| doc.add_page_break()
|
|
|
|
|
|
|
|
|
| heading(doc, '5. Module Breakdown', level=1)
|
|
|
| body(doc, 'The MAC codebase is organised into 11 router modules, 7 service modules, 2 middleware modules, '
|
| '4 ORM model files, and 11 Pydantic schema files. Each module has a clear single responsibility.')
|
|
|
| add_diagram(doc, dia_module_structure())
|
| body(doc, 'Figure 3: Complete module architecture showing routers, services, middleware, models, and schemas.', italic=True, size=10, justify=False)
|
|
|
|
|
| heading(doc, '5.1 Authentication Module', level=2)
|
| body(doc, 'The authentication module is the most complex, handling the complete user lifecycle from first-time '
|
| 'DOB verification through to admin user management.')
|
|
|
| heading(doc, 'Unified Auth Flow', level=3)
|
| body(doc, 'Unlike traditional systems that separate login and signup, MAC uses a single unified flow. Students '
|
| 'enter their Registration Number and Date of Birth (in DDMMYYYY format). The system verifies these against '
|
| 'the pre-loaded Student Registry in the database. If the student has not logged in before, an account is '
|
| 'automatically created with must_change_password set to true, redirecting them to set a password. On subsequent '
|
| 'visits, the same flow authenticates them.')
|
|
|
| add_diagram(doc, dia_auth_flow())
|
| body(doc, 'Figure 4: Unified authentication flow β from DOB verification to token issuance.', italic=True, size=10, justify=False)
|
|
|
| heading(doc, 'Security Features', level=3)
|
| security_features = [
|
| ('Password hashing:', ' bcrypt with work factor 12. All bcrypt calls are wrapped with asyncio.to_thread() to prevent blocking the async event loop.'),
|
| ('JWT tokens:', ' HS256-signed access tokens with 24-hour expiry. Refresh tokens are random 32-byte secrets stored as SHA-256 hashes.'),
|
| ('Account lockout:', ' After 5 failed login attempts, the account is locked for 15 minutes.'),
|
| ('API keys:', ' Each user receives a unique api_key (mac_sk_live_*) for programmatic access. Keys are stored in the database.'),
|
| ('Role-based access:', ' Three roles β student, faculty, admin β with escalating privileges.'),
|
| ]
|
| for bold_part, rest in security_features:
|
| bullet(doc, rest, bold_prefix=bold_part)
|
|
|
| heading(doc, 'Admin Endpoints', level=3)
|
| body(doc, 'Administrators can manage users, the student registry, and view platform statistics through 10 dedicated '
|
| 'admin endpoints under /auth/admin/*:')
|
| styled_table(doc, ['Endpoint', 'Description'],
|
| [
|
| ['GET /auth/admin/users', 'List all registered users with roles and status'],
|
| ['POST /auth/admin/users', 'Create a new user account'],
|
| ['PUT /auth/admin/users/{id}/role', 'Change user role (student/faculty/admin)'],
|
| ['PUT /auth/admin/users/{id}/status', 'Activate or deactivate a user account'],
|
| ['DELETE /auth/admin/users/{id}', 'Permanently delete a user'],
|
| ['POST /auth/admin/users/{id}/reset-password', 'Reset user password to a temporary value'],
|
| ['POST /auth/admin/users/{id}/regenerate-key', 'Regenerate a user\'s API key'],
|
| ['GET /auth/admin/registry', 'List all entries in the student registry'],
|
| ['POST /auth/admin/registry', 'Add a new student to the registry'],
|
| ['POST /auth/admin/registry/bulk', 'Bulk import students from CSV/JSON data'],
|
| ['GET /auth/admin/stats', 'Dashboard statistics (users, requests, tokens)'],
|
| ], col_widths=[3.0, 3.6])
|
|
|
|
|
| heading(doc, '5.2 Explore & Discovery Module', level=2)
|
| body(doc, 'The explore module provides read-only discovery endpoints that require no authentication. Students '
|
| 'can see what models are available, what capabilities the platform offers, search models by tag, list all '
|
| 'API endpoints, and check platform health β all before writing any code.')
|
|
|
| styled_table(doc, ['Endpoint', 'Description'],
|
| [
|
| ['GET /explore/models', 'List all deployed models with status, capabilities, and context length'],
|
| ['GET /explore/models/search?tag=code', 'Filter models by capability tag (code, math, vision, chat)'],
|
| ['GET /explore/models/{model_id}', 'Detailed model info including benchmarks and example prompts'],
|
| ['GET /explore/endpoints', 'Auto-generated list of all 50+ API endpoints in the platform'],
|
| ['GET /explore/health', 'Platform health: uptime, models loaded, version number'],
|
| ], col_widths=[3.2, 3.4])
|
|
|
|
|
| heading(doc, '5.3 Query & Inference Module', level=2)
|
| body(doc, 'The query module is the core inference API. It supports chat completions, text completions, embeddings, '
|
| 're-ranking, vision analysis, and speech-to-text. All endpoints are OpenAI-compatible β students can use the '
|
| 'official OpenAI SDK by simply changing the base_url.')
|
|
|
| heading(doc, 'Smart Routing', level=3)
|
| body(doc, 'When the model parameter is set to "auto", the system analyses the request content and routes to the '
|
| 'optimal model:')
|
| styled_table(doc, ['Content Signal', 'Routed To'],
|
| [
|
| ['Code keywords (function, debug, class, compile, etc.)', 'qwen2.5-coder:7b'],
|
| ['Math/reasoning (solve, prove, equation, step-by-step)', 'deepseek-r1:8b'],
|
| ['Image attachment in request', 'llava:7b'],
|
| ['General text, Q&A, summarisation', 'qwen2.5:14b'],
|
| ])
|
|
|
| heading(doc, 'Streaming', level=3)
|
| body(doc, 'When stream=true is set in the request, the response is delivered as Server-Sent Events (SSE). Each '
|
| 'event contains a delta with a partial token, enabling real-time display in the frontend. The Nginx '
|
| 'configuration has proxy_buffering disabled to support SSE passthrough.')
|
|
|
| heading(doc, 'Endpoints', level=3)
|
| styled_table(doc, ['Endpoint', 'Content-Type', 'Description'],
|
| [
|
| ['POST /query/chat', 'application/json', 'Chat completion β multi-turn, streaming/non-streaming'],
|
| ['POST /query/completions', 'application/json', 'Raw text completion'],
|
| ['POST /query/embeddings', 'application/json', 'Text β 768-dim vector embeddings'],
|
| ['POST /query/rerank', 'application/json', 'Re-rank passages by cosine similarity'],
|
| ['POST /query/vision', 'multipart/form-data', 'Image + text β answer'],
|
| ['POST /query/speech-to-text', 'multipart/form-data', 'Audio β transcribed text'],
|
| ], col_widths=[2.2, 1.8, 2.6])
|
|
|
|
|
| heading(doc, '5.4 Usage Tracking Module', level=2)
|
| body(doc, 'Every API call is logged with the model used, token counts (input and output separately), latency in '
|
| 'milliseconds, status code, and a unique request ID. Students can track their own consumption, and administrators '
|
| 'can monitor the entire platform.')
|
|
|
| styled_table(doc, ['Endpoint', 'Auth', 'Description'],
|
| [
|
| ['GET /usage/me', 'Any user', 'Token usage β today, this week, this month, by model'],
|
| ['GET /usage/me/history', 'Any user', 'Paginated request history with timestamps'],
|
| ['GET /usage/me/quota', 'Any user', 'Current quota limits and remaining balance'],
|
| ['GET /usage/admin/all', 'Admin', 'All users\' usage summary'],
|
| ['GET /usage/admin/user/{roll}', 'Admin', 'Specific student\'s detailed usage'],
|
| ['GET /usage/admin/models', 'Admin', 'Per-model usage stats across the platform'],
|
| ], col_widths=[2.5, 0.8, 3.3])
|
|
|
|
|
| heading(doc, '5.5 Model Management Module', level=2)
|
| body(doc, 'This module provides model lifecycle management β listing available models, loading/unloading models '
|
| 'from GPU memory, checking model health, and downloading new models from Ollama.')
|
|
|
| styled_table(doc, ['Model', 'Parameters', 'Specialty', 'VRAM'],
|
| [
|
| ['Qwen2.5-Coder 7B', '7B', 'Code generation, debugging, completion', '~5 GB'],
|
| ['DeepSeek-R1 8B', '8B', 'Mathematics, step-by-step reasoning', '~6 GB'],
|
| ['LLaVA 1.6 7B', '7B', 'Image understanding, visual Q&A', '~8 GB'],
|
| ['Qwen2.5 14B', '14B', 'General text, summarisation, Q&A', '~10 GB'],
|
| ['Whisper Large v3', '1.5B', 'Speech-to-text transcription', '~3 GB'],
|
| ])
|
|
|
|
|
| heading(doc, '5.6 API Key Management Module', level=2)
|
| body(doc, 'Every user receives a unique API key (format: mac_sk_live_<hex>) upon account creation. The module '
|
| 'handles key generation, revocation, usage statistics, and admin-level key management. API keys serve as an '
|
| 'alternative authentication method to JWT tokens, particularly useful for programmatic access from scripts and notebooks.')
|
|
|
| styled_table(doc, ['Endpoint', 'Description'],
|
| [
|
| ['GET /keys/my-key', 'Get current API key (partially masked) and metadata'],
|
| ['POST /keys/generate', 'Generate new API key (invalidates old one)'],
|
| ['GET /keys/my-key/stats', 'Token consumption against this key'],
|
| ['DELETE /keys/my-key', 'Revoke current key permanently'],
|
| ['GET /keys/admin/all', 'List all student API keys and status (admin)'],
|
| ['POST /keys/admin/revoke', 'Force-revoke a student\'s key (admin)'],
|
| ], col_widths=[2.5, 4.1])
|
|
|
|
|
| heading(doc, '5.7 Quota & Rate Limiting Module', level=2)
|
| body(doc, 'Rate limits prevent any single user from monopolising GPU resources. The system applies a sliding-window '
|
| 'algorithm for request rate limiting and a daily token counter for quota enforcement. Administrators can set '
|
| 'per-user quota overrides.')
|
|
|
| styled_table(doc, ['Role', 'Daily Token Limit', 'Requests/Hour', 'Max Tokens/Request'],
|
| [
|
| ['Student', '50,000', '100', '4,096'],
|
| ['Faculty', '200,000', '500', '8,192'],
|
| ['Admin', 'Unlimited', 'Unlimited', '16,384'],
|
| ])
|
|
|
| body(doc, 'Rate limit information is returned in every API response via custom headers: X-RateLimit-Limit, '
|
| 'X-RateLimit-Remaining, X-RateLimit-Reset, X-TokenLimit-Limit, and X-TokenLimit-Remaining.')
|
|
|
|
|
| heading(doc, '5.8 Guardrails Module', level=2)
|
| body(doc, 'The guardrails module provides content filtering for both inputs and outputs. It is the safety layer '
|
| 'that protects against prompt injection, harmful content generation, and inadvertent PII exposure.')
|
|
|
| add_diagram(doc, dia_guardrail_pipeline())
|
| body(doc, 'Figure 5: Input and output guardrail pipeline with check categories.', italic=True, size=10, justify=False)
|
|
|
| heading(doc, 'Input Checks', level=3)
|
| styled_table(doc, ['Category', 'What It Detects', 'Action'],
|
| [
|
| ['Prompt Injection', 'Override attempts (ignore instructions, you are now, etc.)', 'Block'],
|
| ['Harmful Content', 'Violence, weapons, drugs, explicit content keywords', 'Block'],
|
| ['Academic Dishonesty', 'Exact exam answers, plagiarism requests', 'Flag + warn'],
|
| ['Max Length', 'Prompts exceeding 32,000 characters', 'Truncate'],
|
| ])
|
|
|
| heading(doc, 'Output Checks (PII Redaction)', level=3)
|
| styled_table(doc, ['Pattern', 'Example', 'Action'],
|
| [
|
| ['Email addresses', 'user@example.com β [EMAIL REDACTED]', 'Redact'],
|
| ['Indian phone numbers', '+91-98765-43210 β [PHONE REDACTED]', 'Redact'],
|
| ['ID/card numbers', '12-digit numbers β [ID REDACTED]', 'Redact'],
|
| ])
|
|
|
| body(doc, 'Built-in patterns are always active. Administrators can add custom regex patterns via the '
|
| 'PUT /guardrails/rules endpoint, which are stored in the database and applied alongside the defaults.')
|
|
|
|
|
| heading(doc, '5.9 RAG (Retrieval-Augmented Generation) Module', level=2)
|
| body(doc, 'The RAG module enables students and faculty to upload documents (textbooks, lecture notes, research papers) '
|
| 'and query them using natural language. The system chunks documents, generates vector embeddings, stores them in '
|
| 'Qdrant, and retrieves the most relevant passages when answering questions.')
|
|
|
| add_diagram(doc, dia_rag_pipeline())
|
| body(doc, 'Figure 6: RAG pipeline showing document ingestion (top) and query-time retrieval (bottom).', italic=True, size=10, justify=False)
|
|
|
| heading(doc, 'Document Ingestion', level=3)
|
| steps_rag = [
|
| 'User uploads a document via POST /rag/ingest (multipart/form-data).',
|
| 'The text is extracted and split into chunks of ~512 tokens with 50-token overlap at word boundaries.',
|
| 'Each chunk is passed through the embedding model to generate a 768-dimensional vector.',
|
| 'Vectors are stored in Qdrant under the specified collection with metadata (document ID, chunk index).',
|
| 'The document record is saved in PostgreSQL with chunk_count and status.',
|
| ]
|
| for step in steps_rag:
|
| bullet(doc, step)
|
|
|
| heading(doc, 'Query Flow', level=3)
|
| body(doc, 'When a student queries via POST /rag/query, the question is embedded, Qdrant performs a similarity '
|
| 'search to find the top-k most relevant chunks, and these chunks are injected into the LLM prompt as context. '
|
| 'The LLM generates an answer grounded in the retrieved content, with source citations included.')
|
|
|
|
|
| heading(doc, '5.10 Search & Grounded Answers Module', level=2)
|
| body(doc, 'This module integrates a self-hosted SearXNG meta-search engine with the LLM to provide web-grounded, '
|
| 'cited answers. Unlike RAG (which searches local documents), this module searches the open web.')
|
|
|
| add_diagram(doc, dia_search_flow())
|
| body(doc, 'Figure 7: Grounded search pipeline β from query to cited answer.', italic=True, size=10, justify=False)
|
|
|
| styled_table(doc, ['Endpoint', 'Description'],
|
| [
|
| ['POST /search/web', 'Web search via SearXNG (returns top results with snippets)'],
|
| ['POST /search/wikipedia', 'Wikipedia article search'],
|
| ['POST /search/grounded', 'Web search + LLM-generated answer with inline citations'],
|
| ['GET /search/cache', 'List cached search results (1-hour in-memory cache)'],
|
| ], col_widths=[2.5, 4.1])
|
|
|
|
|
| heading(doc, '5.11 Integration & Worker Management Module', level=2)
|
| body(doc, 'The integration module manages the connection between the FastAPI gateway and the LLM inference workers. '
|
| 'It exposes smart routing rules, worker node status, and the inference queue.')
|
|
|
| styled_table(doc, ['Endpoint', 'Description'],
|
| [
|
| ['GET /integration/routing-rules', 'View current smart routing configuration'],
|
| ['PUT /integration/routing-rules', 'Update routing rules (admin)'],
|
| ['GET /integration/workers', 'List all worker nodes with status'],
|
| ['GET /integration/workers/{node_id}', 'Detailed worker info (GPU temp, VRAM, models loaded)'],
|
| ['POST /integration/workers/{node_id}/drain', 'Drain a worker for maintenance (admin)'],
|
| ['GET /integration/queue', 'Current inference queue depth and wait times'],
|
| ], col_widths=[3.2, 3.4])
|
|
|
| doc.add_page_break()
|
|
|
|
|
|
|
|
|
| heading(doc, '6. Database Design', level=1)
|
|
|
| body(doc, 'The relational schema is implemented using SQLAlchemy 2.0 with async support. In development, SQLite '
|
| 'is used (via aiosqlite) for zero-configuration setup; in production, PostgreSQL 16 is used (via asyncpg). '
|
| 'All primary keys use UUIDs to prevent sequential ID enumeration attacks.')
|
|
|
| add_diagram(doc, dia_db_schema())
|
| body(doc, 'Figure 8: Entity-Relationship diagram showing all database tables and their relationships.', italic=True, size=10, justify=False)
|
|
|
| heading(doc, 'Table Descriptions', level=2)
|
|
|
| styled_table(doc, ['Table', 'Records', 'Key Columns', 'Purpose'],
|
| [
|
| ['student_registry', 'Pre-loaded', 'roll_number, name, dob, department, batch_year', 'College student records for DOB verification'],
|
| ['users', 'Dynamic', 'roll_number, email, role, password_hash, api_key, must_change_password', 'Registered platform users with auth credentials'],
|
| ['refresh_tokens', 'Dynamic', 'user_id, token_hash, expires_at, revoked', 'JWT refresh tokens for session continuity'],
|
| ['usage_logs', 'Append-only', 'user_id, model, tokens_in, tokens_out, latency_ms', 'Per-request consumption tracking'],
|
| ['quota_overrides', 'Admin-set', 'user_id, daily_tokens, requests_per_hour', 'Per-user quota overrides beyond role defaults'],
|
| ['guardrail_rules', 'Admin-set', 'category, action, pattern, enabled, priority', 'Custom content filter rules'],
|
| ['rag_collections', 'Dynamic', 'name, description, document_count', 'Named document collections for RAG'],
|
| ['rag_documents', 'Dynamic', 'collection_id, title, chunk_count, status', 'Uploaded documents with processing status'],
|
| ], col_widths=[1.3, 0.8, 2.8, 1.7])
|
|
|
| doc.add_page_break()
|
|
|
|
|
|
|
|
|
| heading(doc, '7. Frontend β Progressive Web App', level=1)
|
|
|
| body(doc, 'The MAC frontend is a single-page Progressive Web App (PWA) built with vanilla JavaScript, HTML, and CSS β '
|
| 'no build step, no Node.js, no React or Angular dependency. The entire frontend fits in four files: index.html, '
|
| 'app.js (~750 lines), style.css, and manifest.json. It is served directly by the FastAPI static file mount.')
|
|
|
| heading(doc, 'Key Features', level=2)
|
| frontend_features = [
|
| ('Installable PWA:', ' Users can install the app to their desktop or phone via the browser\'s "Install" prompt. A service worker (sw.js) enables offline caching.'),
|
| ('Single-page routing:', ' Navigation is handled via hash-based routing (#dashboard, #chat, #settings, #admin) with no page reloads.'),
|
| ('Unified auth page:', ' A single form for Registration Number + Date of Birth replaces traditional login/signup flows.'),
|
| ('Multi-session chat:', ' Students can create multiple chat sessions, each stored in localStorage. Messages stream in real-time via SSE.'),
|
| ('Dashboard:', ' Displays welcome card, today\'s tokens, requests per hour, chat session count, two donut charts (token usage and request breakdown), model status list, and recent activity table.'),
|
| ('Admin panel:', ' Three-tab interface β Overview (stats), Users (full CRUD table), Student Registry (list/add/bulk import).'),
|
| ('Settings:', ' Profile editing (name, email), password change with eye-toggle visibility, API key display and copy.'),
|
| ('XSS protection:', ' All user-generated content is HTML-escaped via the esc() utility function before DOM injection.'),
|
| ]
|
| for bold_part, rest in frontend_features:
|
| bullet(doc, rest, bold_prefix=bold_part)
|
|
|
| add_diagram(doc, dia_frontend_flow())
|
| body(doc, 'Figure 9: Frontend user journey β from authentication to dashboard, chat, settings, and admin.', italic=True, size=10, justify=False)
|
|
|
| heading(doc, 'Design', level=2)
|
| body(doc, 'The frontend uses a black-and-white professional theme with CSS custom properties for colours. '
|
| 'The "MAC" branding features a glitch animation effect. The layout is fully responsive with breakpoints '
|
| 'at 768px and 600px for tablet and mobile devices.')
|
|
|
| doc.add_page_break()
|
|
|
|
|
|
|
|
|
| heading(doc, '8. Security Architecture', level=1)
|
|
|
| body(doc, 'Security is implemented at every layer of the stack, following the OWASP Top 10 guidelines:')
|
|
|
| styled_table(doc, ['Threat / Concern', 'Mitigation'],
|
| [
|
| ['Password storage', 'bcrypt (work factor 12), async-wrapped to prevent event loop blocking'],
|
| ['JWT token theft', 'Short expiry (24h), refresh tokens stored as SHA-256 hashes, revocable'],
|
| ['API key exposure', 'Keys shown only once at creation; masked in all subsequent reads'],
|
| ['Brute force login', 'Account lockout after 5 failed attempts for 15 minutes'],
|
| ['SQL injection', 'SQLAlchemy ORM with parameterised queries throughout'],
|
| ['XSS (frontend)', 'esc() function HTML-escapes all dynamic content before DOM insertion'],
|
| ['CSRF', 'API uses stateless JWT; no cookies for auth. CORS restricted to frontend origin'],
|
| ['Prompt injection', 'Input guardrails detect and block override/jailbreak patterns'],
|
| ['PII leakage', 'Output guardrails redact emails, phone numbers, and ID numbers'],
|
| ['Rate abuse', 'Sliding-window rate limiter per user + daily token quota enforcement'],
|
| ['Transport security', 'Nginx terminates TLS; internal traffic on Docker bridge network'],
|
| ['Role escalation', 'require_admin dependency on all admin endpoints; role checked from JWT'],
|
| ['Excessive payload', 'Nginx enforces 25 MB upload limit; Pydantic validates all request schemas'],
|
| ], col_widths=[1.8, 4.8])
|
|
|
| doc.add_page_break()
|
|
|
|
|
|
|
|
|
| heading(doc, '9. Deployment & Infrastructure', level=1)
|
|
|
| body(doc, 'The entire platform is containerised with Docker and orchestrated with Docker Compose. A single '
|
| 'docker-compose up command starts all 7 services on any machine with Docker installed.')
|
|
|
| add_diagram(doc, dia_deployment())
|
| body(doc, 'Figure 10: Docker Compose deployment showing all 7 services and the bridge network.', italic=True, size=10, justify=False)
|
|
|
| heading(doc, 'Service Inventory', level=2)
|
| styled_table(doc, ['Service', 'Image', 'Port', 'Purpose'],
|
| [
|
| ['mac', 'Custom (Dockerfile)', '8000', 'FastAPI API gateway'],
|
| ['postgres', 'postgres:16-alpine', '5432', 'Relational database'],
|
| ['redis', 'redis:7-alpine', '6379', 'Cache and rate limiting'],
|
| ['nginx', 'nginx:alpine', '80/443', 'Reverse proxy and TLS'],
|
| ['qdrant', 'qdrant/qdrant', '6333', 'Vector database for RAG'],
|
| ['searxng', 'searxng/searxng', '8080', 'Self-hosted web search'],
|
| ['litellm', 'ghcr.io/berriai/litellm', '4000', 'LLM proxy and model router'],
|
| ])
|
|
|
| heading(doc, 'Scaling Strategy', level=2)
|
| body(doc, 'To scale from 1 PC to N PCs:')
|
| scaling_steps = [
|
| 'Install vLLM or Ollama on additional lab PCs with GPUs.',
|
| 'Load the desired models on each new node.',
|
| 'Register the new nodes in litellm/config.yaml with their IP addresses.',
|
| 'Restart the LiteLLM proxy (or hot-reload the config).',
|
| 'LiteLLM automatically load-balances requests across all registered nodes.',
|
| ]
|
| for step in scaling_steps:
|
| bullet(doc, step)
|
|
|
| body(doc, 'The FastAPI gateway, PostgreSQL database, Redis cache, and Nginx proxy remain on the primary node. '
|
| 'Only the GPU inference workers are distributed. This architecture supports 30+ worker nodes with a single '
|
| 'gateway instance.')
|
|
|
| heading(doc, 'Development Mode', level=2)
|
| body(doc, 'For development, the platform can run without Docker:')
|
| code_block(doc,
|
| '# Create virtual environment and install dependencies\n'
|
| 'python -m venv venv\n'
|
| '.\\venv\\Scripts\\Activate.ps1 # Windows\n'
|
| 'pip install -r requirements.txt\n\n'
|
| '# Run the server (SQLite, no PostgreSQL/Redis needed)\n'
|
| 'set PYTHONPATH=D:\\MBM\n'
|
| 'uvicorn mac.main:app --host 0.0.0.0 --port 8000 --reload'
|
| )
|
|
|
| doc.add_page_break()
|
|
|
|
|
|
|
|
|
| heading(doc, '10. Testing Strategy', level=1)
|
|
|
| body(doc, 'The project includes a comprehensive automated test suite with 81+ test cases across all 11 modules. '
|
| 'Tests are written using pytest with the pytest-asyncio plugin for testing async FastAPI endpoints. An async '
|
| 'test client (httpx.AsyncClient) makes real HTTP requests against the in-memory ASGI app β no external server needed.')
|
|
|
| heading(doc, 'Test Infrastructure', level=2)
|
| test_infra = [
|
| ('Fixtures:', ' conftest.py provides auto-use database setup (create/drop tables per test), test_user, admin_user, and pre-authenticated header fixtures.'),
|
| ('Database isolation:', ' Each test runs against a fresh in-memory SQLite database to prevent cross-test contamination.'),
|
| ('No mocking:', ' Tests hit the actual FastAPI application through ASGI transport β the same code path as production.'),
|
| ]
|
| for bold_part, rest in test_infra:
|
| bullet(doc, rest, bold_prefix=bold_part)
|
|
|
| heading(doc, 'Test Coverage by Module', level=2)
|
| add_diagram(doc, dia_test_coverage())
|
| body(doc, 'Figure 11: Test case distribution across all 11 modules.', italic=True, size=10, justify=False)
|
|
|
| styled_table(doc, ['Test File', 'Module', 'Tests', 'What Is Tested'],
|
| [
|
| ['test_auth.py', 'Authentication', '15', 'Login, logout, refresh, DOB verify, admin CRUD, registry'],
|
| ['test_query.py', 'Query', '12', 'Chat, completions, embeddings, rerank, vision, streaming'],
|
| ['test_models.py', 'Models', '8', 'List, detail, health, load/unload, download'],
|
| ['test_explore.py', 'Explore', '6', 'Models, endpoints, health, search by tag'],
|
| ['test_usage.py', 'Usage', '8', 'My usage, history, quota, admin views'],
|
| ['test_keys.py', 'Keys', '6', 'Generate, view, revoke, admin list'],
|
| ['test_quota.py', 'Quota', '5', 'Limits, personal quota, admin overrides'],
|
| ['test_guardrails.py', 'Guardrails', '7', 'Input filtering, PII redaction, rule management'],
|
| ['test_rag.py', 'RAG', '8', 'Ingest, query, collections, document lifecycle'],
|
| ['test_search.py', 'Search', '5', 'Web search, Wikipedia, grounded search, cache'],
|
| ['test_integration.py', 'Integration', '6', 'Routing rules, workers, queue status'],
|
| ], col_widths=[1.5, 1.2, 0.5, 3.4])
|
|
|
| heading(doc, 'Running Tests', level=2)
|
| code_block(doc,
|
| '# Run all 81+ tests\n'
|
| 'set PYTHONPATH=D:\\MBM\n'
|
| 'pytest tests/ -v\n\n'
|
| '# Run a specific module\n'
|
| 'pytest tests/test_auth.py -v\n\n'
|
| '# Run with coverage report\n'
|
| 'pytest tests/ --cov=mac --cov-report=term-missing'
|
| )
|
|
|
| doc.add_page_break()
|
|
|
|
|
|
|
|
|
| heading(doc, '11. API Endpoint Reference', level=1)
|
|
|
| body(doc, 'Complete listing of all REST API endpoints exposed by the MAC platform. Base URL: http://<server-ip>/api/v1')
|
|
|
| all_endpoints = [
|
|
|
| ('POST', '/auth/verify', 'No', 'Unified DOB-based authentication'),
|
| ('POST', '/auth/login', 'No', 'Password-based login (legacy/API)'),
|
| ('POST', '/auth/set-password', 'JWT', 'First-time password setup'),
|
| ('POST', '/auth/logout', 'JWT', 'Revoke session tokens'),
|
| ('POST', '/auth/refresh', 'No', 'Exchange refresh token'),
|
| ('GET', '/auth/me', 'JWT', 'User profile with quota info'),
|
| ('PUT', '/auth/me/profile', 'JWT', 'Update name/email'),
|
| ('POST', '/auth/change-password', 'JWT', 'Change password'),
|
| ('GET', '/auth/admin/users', 'Admin', 'List all users'),
|
| ('POST', '/auth/admin/users', 'Admin', 'Create user'),
|
| ('PUT', '/auth/admin/users/{id}/role', 'Admin', 'Change role'),
|
| ('PUT', '/auth/admin/users/{id}/status', 'Admin', 'Toggle active'),
|
| ('DELETE', '/auth/admin/users/{id}', 'Admin', 'Delete user'),
|
| ('POST', '/auth/admin/users/{id}/reset-password', 'Admin', 'Reset password'),
|
| ('POST', '/auth/admin/users/{id}/regenerate-key', 'Admin', 'Regenerate API key'),
|
| ('GET', '/auth/admin/registry', 'Admin', 'List registry'),
|
| ('POST', '/auth/admin/registry', 'Admin', 'Add registry entry'),
|
| ('POST', '/auth/admin/registry/bulk', 'Admin', 'Bulk import'),
|
| ('GET', '/auth/admin/stats', 'Admin', 'Dashboard stats'),
|
|
|
| ('GET', '/explore/models', 'No', 'List models'),
|
| ('GET', '/explore/models/search', 'No', 'Search by tag'),
|
| ('GET', '/explore/models/{id}', 'No', 'Model detail'),
|
| ('GET', '/explore/endpoints', 'No', 'List all endpoints'),
|
| ('GET', '/explore/health', 'No', 'Platform health'),
|
|
|
| ('POST', '/query/chat', 'JWT/Key', 'Chat completion'),
|
| ('POST', '/query/completions', 'JWT/Key', 'Text completion'),
|
| ('POST', '/query/embeddings', 'JWT/Key', 'Vector embeddings'),
|
| ('POST', '/query/rerank', 'JWT/Key', 'Re-rank passages'),
|
| ('POST', '/query/vision', 'JWT/Key', 'Image analysis'),
|
| ('POST', '/query/speech-to-text', 'JWT/Key', 'Speech to text'),
|
|
|
| ('GET', '/usage/me', 'JWT', 'My usage stats'),
|
| ('GET', '/usage/me/history', 'JWT', 'Request history'),
|
| ('GET', '/usage/me/quota', 'JWT', 'My quota status'),
|
| ('GET', '/usage/admin/all', 'Admin', 'All users usage'),
|
| ('GET', '/usage/admin/user/{roll}', 'Admin', 'User usage detail'),
|
| ('GET', '/usage/admin/models', 'Admin', 'Per-model stats'),
|
|
|
| ('GET', '/models/', 'No', 'List models'),
|
| ('GET', '/models/{id}', 'No', 'Model detail'),
|
| ('POST', '/models/{id}/load', 'Admin', 'Load model'),
|
| ('POST', '/models/{id}/unload', 'Admin', 'Unload model'),
|
| ('GET', '/models/{id}/health', 'No', 'Model health'),
|
| ('POST', '/models/download', 'Admin', 'Download model'),
|
| ('GET', '/models/download/{task}', 'No', 'Download progress'),
|
|
|
| ('GET', '/keys/my-key', 'JWT', 'View API key'),
|
| ('POST', '/keys/generate', 'JWT', 'Generate new key'),
|
| ('GET', '/keys/my-key/stats', 'JWT', 'Key usage stats'),
|
| ('DELETE', '/keys/my-key', 'JWT', 'Revoke key'),
|
| ('GET', '/keys/admin/all', 'Admin', 'All keys'),
|
| ('POST', '/keys/admin/revoke', 'Admin', 'Force revoke'),
|
|
|
| ('GET', '/quota/limits', 'No', 'Default limits'),
|
| ('GET', '/quota/me', 'JWT', 'My quota'),
|
| ('PUT', '/quota/admin/user/{roll}', 'Admin', 'Set override'),
|
| ('GET', '/quota/admin/exceeded', 'Admin', 'Exceeded users'),
|
|
|
| ('POST', '/guardrails/check-input', 'JWT', 'Check input'),
|
| ('POST', '/guardrails/check-output', 'JWT', 'Check output'),
|
| ('GET', '/guardrails/rules', 'Admin', 'List rules'),
|
| ('PUT', '/guardrails/rules', 'Admin', 'Update rules'),
|
|
|
| ('POST', '/rag/ingest', 'JWT', 'Upload document'),
|
| ('GET', '/rag/documents', 'JWT', 'List documents'),
|
| ('GET', '/rag/documents/{id}', 'JWT', 'Document detail'),
|
| ('DELETE', '/rag/documents/{id}', 'Admin', 'Delete document'),
|
| ('POST', '/rag/query', 'JWT', 'RAG-augmented Q&A'),
|
| ('GET', '/rag/query/{id}/sources', 'JWT', 'Query sources'),
|
| ('POST', '/rag/collections', 'Admin', 'Create collection'),
|
| ('GET', '/rag/collections', 'JWT', 'List collections'),
|
|
|
| ('POST', '/search/web', 'JWT', 'Web search'),
|
| ('POST', '/search/wikipedia', 'JWT', 'Wikipedia search'),
|
| ('POST', '/search/grounded', 'JWT', 'Grounded answer'),
|
| ('GET', '/search/cache', 'JWT', 'Search cache'),
|
|
|
| ('GET', '/integration/routing-rules', 'No', 'Routing config'),
|
| ('PUT', '/integration/routing-rules', 'Admin', 'Update routing'),
|
| ('GET', '/integration/workers', 'No', 'List workers'),
|
| ('GET', '/integration/workers/{id}', 'No', 'Worker detail'),
|
| ('POST', '/integration/workers/{id}/drain', 'Admin', 'Drain worker'),
|
| ('GET', '/integration/queue', 'No', 'Queue status'),
|
| ]
|
|
|
| styled_table(doc, ['Method', 'Path', 'Auth', 'Description'],
|
| all_endpoints, col_widths=[0.6, 2.8, 0.7, 2.5])
|
|
|
| doc.add_page_break()
|
|
|
|
|
|
|
|
|
| heading(doc, '12. Project Structure', level=1)
|
|
|
| body(doc, 'The codebase follows a clean, modular layout. Each concern (routing, business logic, data models, schemas) '
|
| 'is separated into its own directory with one file per domain.')
|
|
|
| tree = (
|
| 'MAC/\n'
|
| 'βββ mac/ # Main application package\n'
|
| 'β βββ __init__.py\n'
|
| 'β βββ main.py # FastAPI entry point, lifespan, seed\n'
|
| 'β βββ config.py # Pydantic settings from .env\n'
|
| 'β βββ database.py # Async SQLAlchemy engine & sessions\n'
|
| 'β βββ routers/ # 11 API router modules\n'
|
| 'β β βββ auth.py # 19 endpoints\n'
|
| 'β β βββ explore.py # 5 endpoints\n'
|
| 'β β βββ query.py # 6 endpoints\n'
|
| 'β β βββ usage.py # 6 endpoints\n'
|
| 'β β βββ models.py # 7 endpoints\n'
|
| 'β β βββ keys.py # 6 endpoints\n'
|
| 'β β βββ quota.py # 4 endpoints\n'
|
| 'β β βββ guardrails.py # 4 endpoints\n'
|
| 'β β βββ rag.py # 8 endpoints\n'
|
| 'β β βββ search.py # 4 endpoints\n'
|
| 'β β βββ integration.py # 6 endpoints\n'
|
| 'β βββ services/ # 7 business logic modules\n'
|
| 'β β βββ auth_service.py # User auth, tokens, bcrypt\n'
|
| 'β β βββ llm_service.py # LLM proxy, smart routing\n'
|
| 'β β βββ model_service.py # Model lifecycle management\n'
|
| 'β β βββ guardrail_service.py # Content filtering logic\n'
|
| 'β β βββ rag_service.py # Chunking, embedding, Qdrant\n'
|
| 'β β βββ search_service.py # SearXNG, Wikipedia, grounded\n'
|
| 'β β βββ usage_service.py # Usage aggregation queries\n'
|
| 'β βββ models/ # SQLAlchemy ORM models\n'
|
| 'β β βββ user.py # User, StudentRegistry, RefreshToken, UsageLog\n'
|
| 'β β βββ guardrail.py # GuardrailRule\n'
|
| 'β β βββ quota.py # QuotaOverride\n'
|
| 'β β βββ rag.py # RAGCollection, RAGDocument\n'
|
| 'β βββ schemas/ # 11 Pydantic schema files\n'
|
| 'β βββ middleware/ # Auth + rate limiting\n'
|
| 'β βββ utils/ # Security helpers\n'
|
| 'βββ frontend/ # PWA frontend\n'
|
| 'β βββ index.html # HTML shell\n'
|
| 'β βββ app.js # ~750 lines SPA\n'
|
| 'β βββ style.css # Full theme\n'
|
| 'β βββ manifest.json # PWA manifest\n'
|
| 'β βββ sw.js # Service worker\n'
|
| 'βββ tests/ # 81+ test cases\n'
|
| 'β βββ conftest.py # Async fixtures\n'
|
| 'β βββ test_*.py # 11 test files\n'
|
| 'βββ docs/ # Documentation\n'
|
| 'β βββ Phase-0 through Phase-8 # Per-phase DOCX docs\n'
|
| 'β βββ MAC-API-Design-Document # Full API spec\n'
|
| 'βββ litellm/config.yaml # Model routing config\n'
|
| 'βββ nginx/nginx.conf # Reverse proxy config\n'
|
| 'βββ alembic/ # Database migrations\n'
|
| 'βββ docker-compose.yml # 7-service orchestration\n'
|
| 'βββ Dockerfile # Python 3.11 container\n'
|
| 'βββ requirements.txt # 14 Python dependencies\n'
|
| 'βββ pytest.ini # Test configuration'
|
| )
|
| code_block(doc, tree, size=7)
|
|
|
| doc.add_page_break()
|
|
|
|
|
|
|
|
|
| heading(doc, '13. Build Roadmap & Phase Summary', level=1)
|
|
|
| body(doc, 'The project was built in 8 sequential phases, each delivering a working increment:')
|
|
|
| add_diagram(doc, dia_roadmap())
|
| body(doc, 'Figure 12: Eight-phase build roadmap showing dependencies.', italic=True, size=10, justify=False)
|
|
|
| styled_table(doc, ['Phase', 'Name', 'Deliverables', 'Depends On'],
|
| [
|
| ['1', 'API Endpoints', '35 core REST endpoints β auth, explore, query, usage', 'None'],
|
| ['2', 'LLM Models', '5 specialist models selected, VRAM planned, Ollama/vLLM configured', 'Phase 1'],
|
| ['3', 'APIβModel Integration', 'LiteLLM proxy wiring, smart routing, health monitoring', 'Phase 1, 2'],
|
| ['4', 'Usage Control', 'Rate limiting, token quotas, API key lifecycle, sliding-window algorithm', 'Phase 1'],
|
| ['5', 'Web Interface', 'PWA frontend β dashboard, chat, settings, admin panel', 'Phase 1, 4'],
|
| ['6', 'Guardrails', 'Input/output content filtering, PII redaction, custom rules', 'Phase 3'],
|
| ['7', 'Knowledgebase + RAG', 'Document ingestion, vector embedding, Qdrant storage, retrieval chain', 'Phase 3'],
|
| ['8', 'Retrieval + Search', 'SearXNG web search, Wikipedia, grounded answers with citations', 'Phase 3, 7'],
|
| ], col_widths=[0.5, 1.3, 3.2, 1.6])
|
|
|
| doc.add_page_break()
|
|
|
|
|
|
|
|
|
| heading(doc, '14. Future Scope', level=1)
|
|
|
| body(doc, 'While the current platform is fully functional, several enhancements are planned for future development:')
|
|
|
| future_items = [
|
| ('Real-time GPU monitoring dashboard:', ' Integrate NVIDIA SMI data to show live GPU temperature, VRAM utilisation, and inference throughput on the admin panel.'),
|
| ('Model fine-tuning interface:', ' Allow faculty to fine-tune models on domain-specific datasets (e.g., college exam papers, lab manuals) through a web interface.'),
|
| ('Multi-modal support:', ' Expand beyond text and images to support video analysis and diagram generation.'),
|
| ('Conversation persistence:', ' Store chat sessions server-side to enable cross-device continuity and admin audit trails.'),
|
| ('Federation:', ' Connect multiple college instances to share GPU resources and model weights across institutions.'),
|
| ('Automated model selection:', ' Use ML-based classifiers instead of keyword matching for smarter auto-routing.'),
|
| ('Certificate-based auth:', ' Support client certificates for zero-trust network environments.'),
|
| ('Prometheus + Grafana monitoring:', ' Production-grade observability with metrics, alerts, and dashboards.'),
|
| ('WebSocket support:', ' Replace SSE with WebSocket for bidirectional real-time communication.'),
|
| ('Mobile-native app:', ' Dedicated iOS/Android apps with push notifications for admin alerts.'),
|
| ]
|
| for bold_part, rest in future_items:
|
| bullet(doc, rest, bold_prefix=bold_part)
|
|
|
| doc.add_page_break()
|
|
|
|
|
|
|
|
|
| heading(doc, '15. Conclusion', level=1)
|
|
|
| body(doc, 'MAC (MBM AI Cloud) demonstrates that a production-quality, self-hosted AI inference platform can be built '
|
| 'entirely with open-source technologies and existing college hardware. The platform provides 500+ students with '
|
| 'free, private, and high-performance access to state-of-the-art language models β for code generation, mathematical '
|
| 'reasoning, general text tasks, image understanding, and speech transcription.')
|
|
|
| body(doc, 'The architecture is deliberately simple and scalable. A modular FastAPI gateway handles authentication, '
|
| 'rate limiting, and content safety for 50+ API endpoints across 11 modules. The LLM inference is decoupled '
|
| 'through LiteLLM, allowing the system to scale from a single lab PC to 30+ GPU nodes by simply registering '
|
| 'new vLLM workers β zero code changes required.')
|
|
|
| body(doc, 'The unified DOB-based authentication flow eliminates the friction of traditional signup/login processes, '
|
| 'while the PWA frontend provides a ChatGPT-like experience that students can install on any device. Comprehensive '
|
| 'guardrails protect against prompt injection, harmful content, and PII exposure. The RAG pipeline enables '
|
| 'faculty to upload textbooks for AI-augmented learning, and the grounded search module provides cited, factual answers.')
|
|
|
| body(doc, 'With 81+ automated tests, Docker Compose deployment, and thorough documentation, the platform is ready '
|
| 'for production deployment at MBM Engineering College and serves as a replicable template for any educational '
|
| 'institution seeking to democratise AI access.')
|
|
|
|
|
| for _ in range(3):
|
| doc.add_paragraph()
|
| p = doc.add_paragraph()
|
| p.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
| r = p.add_run('β End of Report β')
|
| r.font.size = Pt(12)
|
| r.font.color.rgb = RGBColor(0x6c, 0x75, 0x7d)
|
| r.italic = True
|
|
|
|
|
| out_path = OUT / 'MAC-Final-Project-Report.docx'
|
| doc.save(str(out_path))
|
| size_kb = out_path.stat().st_size // 1024
|
| print(f'\n β Report generated: {out_path}')
|
| print(f' β Size: {size_kb} KB')
|
| print(f' β Sections: 15 chapters')
|
| print(f' β Diagrams: 12 professional figures')
|
| print(f' β Tables: 20+ styled tables')
|
|
|
|
|
| if __name__ == '__main__':
|
| generate_report()
|
|
|