File size: 2,845 Bytes
e0c2d04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
site_name: transformer-deploy by Lefebvre Dalloz
repo_name: ELS-RD/transformer-deploy/
repo_url: https://github.com/ELS-RD/transformer-deploy/
site_description: Efficient, scalable and enterprise-grade CPU/GPU inference server for Hugging Face transformer models
copyright: Copyright © 2020 - 2021 Lefebvre Dalloz
edit_uri: ""

theme:
  name: material
  custom_dir: docs/overrides
  palette:
    scheme: default
    primary: black
    accent: deep orange
#      toggle:
#        icon: material/weather-night
#        name: Switch to light mode
#    - scheme: default
#      primary: black
#      accent: deep orange
#      toggle:
#        icon: material/weather-sunny
#        name: Switch to dark mode
  icon:
    repo: fontawesome/brands/github
    logo: material/speedometer

markdown_extensions:
  - admonition
  - pymdownx.details
  - pymdownx.superfences
  - pymdownx.emoji:
      emoji_index: !!python/name:materialx.emoji.twemoji
      emoji_generator: !!python/name:materialx.emoji.to_svg
  - pymdownx.highlight:
      anchor_linenums: true
  - pymdownx.inlinehilite
  - pymdownx.snippets
  - pymdownx.superfences
  - toc:
      permalink: "#"
  - pymdownx.critic
  - pymdownx.caret
  - pymdownx.mark
  - pymdownx.tilde
  - abbr
  - attr_list
  - md_in_html

plugins:
  - search
  - mkdocstrings:
      handlers:
        python:
          selection:
            docstring_style: restructured-text
  - include-markdown
  - mkdocs-jupyter:
      theme: dark
      ignore_h1_titles: True
  - gen-files:
      scripts:
        - resources/gen_doc_stubs.py
  - literate-nav:
      nav_file: SUMMARY.md

extra:
  social:
    - icon: fontawesome/brands/twitter
      link: https://twitter.com/pommedeterre33
    - icon: fontawesome/brands/medium
      link: https://medium.com/@pommedeterre33
  generator: false

nav:
    - Getting started: index.md
    - Installation (local or Docker only): setup_local.md
    - Run (1 command): run.md
    - Which tool to choose for your inference?: compare.md
    - How ONNX conversion works? : onnx_convert.md
    - Understanding model optimization : optimizations.md
    - Direct use TensorRT in Python script (no server): python.md
    - GPU quantization for X2 speed-up:
        - Why using quantization?: quantization/quantization_intro.md
        - Quantization theory: quantization/quantization_theory.md
        - How is it implemented in this library?: quantization/quantization_ast.md
        - PTQ and QAT, what are they?: quantization/quantization_ptq.md
        - End to end demo: quantization/quantization.ipynb
    - "From optimization to deployment: end to end demo": demo.md
    - "Accelerate text generation with GPT-2": gpt2.ipynb
    - "Accelerate text generation with T5": t5.ipynb
    - Benchmarks run on AWS GPU instances: benchmarks.md
    - FAQ: faq.md
    - API: reference/