Spaces:
Sleeping
Sleeping
[add] gpu info
Browse files- app/models.py +1 -0
- app/tables.py +25 -3
- main.py +10 -5
app/models.py
CHANGED
|
@@ -74,6 +74,7 @@ class Ram(BaseModel):
|
|
| 74 |
class Item(BaseModel):
|
| 75 |
offload_layers: int = Field(alias="offloadLayers")
|
| 76 |
full_offloaded: bool = Field(alias="fullOffloaded")
|
|
|
|
| 77 |
ram: "Ram"
|
| 78 |
vrams: list["Ram"]
|
| 79 |
|
|
|
|
| 74 |
class Item(BaseModel):
|
| 75 |
offload_layers: int = Field(alias="offloadLayers")
|
| 76 |
full_offloaded: bool = Field(alias="fullOffloaded")
|
| 77 |
+
maximum_tokens_per_second: float = Field(None, alias="maximumTokensPerSecond")
|
| 78 |
ram: "Ram"
|
| 79 |
vrams: list["Ram"]
|
| 80 |
|
app/tables.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
import pandas as pd
|
| 2 |
|
|
|
|
| 3 |
from app.models import Architecture, Estimate, Metadata, Tokenizer
|
| 4 |
from app.utils import abbreviate_number, human_readable_size
|
| 5 |
|
|
@@ -26,13 +27,34 @@ def get_model_info_df(
|
|
| 26 |
|
| 27 |
|
| 28 |
def get_estimate_df(estimate: Estimate):
|
|
|
|
| 29 |
return pd.DataFrame(
|
| 30 |
[
|
| 31 |
{
|
|
|
|
|
|
|
|
|
|
| 32 |
"Context Size": estimate.context_size,
|
| 33 |
-
"
|
| 34 |
-
"
|
| 35 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
}
|
|
|
|
| 37 |
]
|
| 38 |
)
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
|
| 3 |
+
from app.devices import Device
|
| 4 |
from app.models import Architecture, Estimate, Metadata, Tokenizer
|
| 5 |
from app.utils import abbreviate_number, human_readable_size
|
| 6 |
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
def get_estimate_df(estimate: Estimate):
|
| 30 |
+
|
| 31 |
return pd.DataFrame(
|
| 32 |
[
|
| 33 |
{
|
| 34 |
+
"Max Token per Sec.": round(
|
| 35 |
+
estimate.items[0].maximum_tokens_per_second, 2
|
| 36 |
+
),
|
| 37 |
"Context Size": estimate.context_size,
|
| 38 |
+
"Offload Layers": estimate.items[0].offload_layers,
|
| 39 |
+
"Full Offloaded": estimate.items[0].full_offloaded,
|
| 40 |
+
"CPU Handle Layers": estimate.items[0].ram.handle_layers,
|
| 41 |
+
"CPU UMA": human_readable_size(estimate.items[0].ram.uma),
|
| 42 |
+
"CPU NONUMA": human_readable_size(estimate.items[0].ram.nonuma),
|
| 43 |
+
}
|
| 44 |
+
]
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def get_gpus_df(estimate: Estimate, gpu_name: str, selected_device: Device):
|
| 49 |
+
return pd.DataFrame(
|
| 50 |
+
[
|
| 51 |
+
{
|
| 52 |
+
"GPU": gpu_name,
|
| 53 |
+
"GPU Memory Size": selected_device.memory_size,
|
| 54 |
+
"Handle Layers": gpu.handle_layers,
|
| 55 |
+
"UMA": human_readable_size(gpu.uma),
|
| 56 |
+
"NONUMA": human_readable_size(gpu.nonuma),
|
| 57 |
}
|
| 58 |
+
for gpu in estimate.items[0].vrams
|
| 59 |
]
|
| 60 |
)
|
main.py
CHANGED
|
@@ -7,7 +7,7 @@ import pandas as pd
|
|
| 7 |
|
| 8 |
from app.devices import Device
|
| 9 |
from app.models import GgufParser
|
| 10 |
-
from app.tables import get_estimate_df, get_model_info_df
|
| 11 |
|
| 12 |
GGUF_PARSER_VERSION = os.getenv("GGUF_PARSER_VERSION", "v0.12.0")
|
| 13 |
gguf_parser = Path("gguf-parser-linux-amd64")
|
|
@@ -27,8 +27,8 @@ device_options = [
|
|
| 27 |
def process_url(url, context_length, device_selection):
|
| 28 |
try:
|
| 29 |
# 取得選擇的裝置鍵值
|
| 30 |
-
|
| 31 |
-
selected_device = devices[
|
| 32 |
res = os.popen(
|
| 33 |
f'./{gguf_parser} --ctx-size={context_length} -url {url} --device-metric "{selected_device.FLOPS};{selected_device.memory_bandwidth}GBps" --json'
|
| 34 |
).read()
|
|
@@ -40,7 +40,9 @@ def process_url(url, context_length, device_selection):
|
|
| 40 |
|
| 41 |
estimate_df = get_estimate_df(parser_result.estimate)
|
| 42 |
|
| 43 |
-
|
|
|
|
|
|
|
| 44 |
except Exception as e:
|
| 45 |
return e
|
| 46 |
|
|
@@ -50,7 +52,9 @@ if __name__ == "__main__":
|
|
| 50 |
os.system(f"wget {gguf_parser_url}&&chmod +x {gguf_parser}")
|
| 51 |
|
| 52 |
with gr.Blocks(title="GGUF Parser") as iface:
|
| 53 |
-
url_input = gr.Textbox(
|
|
|
|
|
|
|
| 54 |
context_length = gr.Number(label="Context Length", value=8192)
|
| 55 |
device_dropdown = gr.Dropdown(label="Select Device", choices=device_options)
|
| 56 |
submit_btn = gr.Button("Send")
|
|
@@ -61,6 +65,7 @@ if __name__ == "__main__":
|
|
| 61 |
outputs=[
|
| 62 |
gr.DataFrame(label="Model Info"),
|
| 63 |
gr.DataFrame(label="ESTIMATE"),
|
|
|
|
| 64 |
],
|
| 65 |
)
|
| 66 |
iface.launch()
|
|
|
|
| 7 |
|
| 8 |
from app.devices import Device
|
| 9 |
from app.models import GgufParser
|
| 10 |
+
from app.tables import get_estimate_df, get_gpus_df, get_model_info_df
|
| 11 |
|
| 12 |
GGUF_PARSER_VERSION = os.getenv("GGUF_PARSER_VERSION", "v0.12.0")
|
| 13 |
gguf_parser = Path("gguf-parser-linux-amd64")
|
|
|
|
| 27 |
def process_url(url, context_length, device_selection):
|
| 28 |
try:
|
| 29 |
# 取得選擇的裝置鍵值
|
| 30 |
+
device_name = device_selection.split(" ")[0]
|
| 31 |
+
selected_device = devices[device_name]
|
| 32 |
res = os.popen(
|
| 33 |
f'./{gguf_parser} --ctx-size={context_length} -url {url} --device-metric "{selected_device.FLOPS};{selected_device.memory_bandwidth}GBps" --json'
|
| 34 |
).read()
|
|
|
|
| 40 |
|
| 41 |
estimate_df = get_estimate_df(parser_result.estimate)
|
| 42 |
|
| 43 |
+
gpus_info_df = get_gpus_df(parser_result.estimate, device_name, selected_device)
|
| 44 |
+
|
| 45 |
+
return model_info, estimate_df, gpus_info_df
|
| 46 |
except Exception as e:
|
| 47 |
return e
|
| 48 |
|
|
|
|
| 52 |
os.system(f"wget {gguf_parser_url}&&chmod +x {gguf_parser}")
|
| 53 |
|
| 54 |
with gr.Blocks(title="GGUF Parser") as iface:
|
| 55 |
+
url_input = gr.Textbox(
|
| 56 |
+
label="GGUF File URL", placeholder="Enter GGUF URL", value=DEFAULT_URL
|
| 57 |
+
)
|
| 58 |
context_length = gr.Number(label="Context Length", value=8192)
|
| 59 |
device_dropdown = gr.Dropdown(label="Select Device", choices=device_options)
|
| 60 |
submit_btn = gr.Button("Send")
|
|
|
|
| 65 |
outputs=[
|
| 66 |
gr.DataFrame(label="Model Info"),
|
| 67 |
gr.DataFrame(label="ESTIMATE"),
|
| 68 |
+
gr.DataFrame(label="GPUs INFO"),
|
| 69 |
],
|
| 70 |
)
|
| 71 |
iface.launch()
|