Spaces:

phate334
/

gguf-parser-web

Running

App Files Files Community

phate334 commited on Nov 3, 2024

Commit

c7f3542

1 Parent(s): 863856d

[add] gpu info

Browse files

Files changed (3) hide show

app/models.py +1 -0
app/tables.py +25 -3
main.py +10 -5

app/models.py CHANGED Viewed

@@ -74,6 +74,7 @@ class Ram(BaseModel):
 class Item(BaseModel):
     offload_layers: int = Field(alias="offloadLayers")
     full_offloaded: bool = Field(alias="fullOffloaded")
     ram: "Ram"
     vrams: list["Ram"]

 class Item(BaseModel):
     offload_layers: int = Field(alias="offloadLayers")
     full_offloaded: bool = Field(alias="fullOffloaded")
+    maximum_tokens_per_second: float = Field(None, alias="maximumTokensPerSecond")
     ram: "Ram"
     vrams: list["Ram"]

app/tables.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import pandas as pd
 from app.models import Architecture, Estimate, Metadata, Tokenizer
 from app.utils import abbreviate_number, human_readable_size
@@ -26,13 +27,34 @@ def get_model_info_df(
 def get_estimate_df(estimate: Estimate):
     return pd.DataFrame(
         [
             {
                 "Context Size": estimate.context_size,
-                "Flash Attention": estimate.flash_attention,
-                "Logical Batch Size": estimate.logical_batch_size,
-                "Physical Batch Size": estimate.physical_batch_size,
             }
         ]
     )

 import pandas as pd
+from app.devices import Device
 from app.models import Architecture, Estimate, Metadata, Tokenizer
 from app.utils import abbreviate_number, human_readable_size
 def get_estimate_df(estimate: Estimate):
     return pd.DataFrame(
         [
             {
+                "Max Token per Sec.": round(
+                    estimate.items[0].maximum_tokens_per_second, 2
+                ),
                 "Context Size": estimate.context_size,
+                "Offload Layers": estimate.items[0].offload_layers,
+                "Full Offloaded": estimate.items[0].full_offloaded,
+                "CPU Handle Layers": estimate.items[0].ram.handle_layers,
+                "CPU UMA": human_readable_size(estimate.items[0].ram.uma),
+                "CPU NONUMA": human_readable_size(estimate.items[0].ram.nonuma),
+            }
+        ]
+    )
+def get_gpus_df(estimate: Estimate, gpu_name: str, selected_device: Device):
+    return pd.DataFrame(
+        [
+            {
+                "GPU": gpu_name,
+                "GPU Memory Size": selected_device.memory_size,
+                "Handle Layers": gpu.handle_layers,
+                "UMA": human_readable_size(gpu.uma),
+                "NONUMA": human_readable_size(gpu.nonuma),
             }
+            for gpu in estimate.items[0].vrams
         ]
     )

main.py CHANGED Viewed

@@ -7,7 +7,7 @@ import pandas as pd
 from app.devices import Device
 from app.models import GgufParser
-from app.tables import get_estimate_df, get_model_info_df
 GGUF_PARSER_VERSION = os.getenv("GGUF_PARSER_VERSION", "v0.12.0")
 gguf_parser = Path("gguf-parser-linux-amd64")
@@ -27,8 +27,8 @@ device_options = [
 def process_url(url, context_length, device_selection):
     try:
         # 取得選擇的裝置鍵值
-        device_key = device_selection.split(" ")[0]
-        selected_device = devices[device_key]
         res = os.popen(
             f'./{gguf_parser} --ctx-size={context_length} -url {url} --device-metric "{selected_device.FLOPS};{selected_device.memory_bandwidth}GBps" --json'
         ).read()
@@ -40,7 +40,9 @@ def process_url(url, context_length, device_selection):
         estimate_df = get_estimate_df(parser_result.estimate)
-        return model_info, estimate_df
     except Exception as e:
         return e
@@ -50,7 +52,9 @@ if __name__ == "__main__":
         os.system(f"wget {gguf_parser_url}&&chmod +x {gguf_parser}")
     with gr.Blocks(title="GGUF Parser") as iface:
-        url_input = gr.Textbox(placeholder="Enter GGUF URL", value=DEFAULT_URL)
         context_length = gr.Number(label="Context Length", value=8192)
         device_dropdown = gr.Dropdown(label="Select Device", choices=device_options)
         submit_btn = gr.Button("Send")
@@ -61,6 +65,7 @@ if __name__ == "__main__":
             outputs=[
                 gr.DataFrame(label="Model Info"),
                 gr.DataFrame(label="ESTIMATE"),
             ],
         )
     iface.launch()

 from app.devices import Device
 from app.models import GgufParser
+from app.tables import get_estimate_df, get_gpus_df, get_model_info_df
 GGUF_PARSER_VERSION = os.getenv("GGUF_PARSER_VERSION", "v0.12.0")
 gguf_parser = Path("gguf-parser-linux-amd64")
 def process_url(url, context_length, device_selection):
     try:
         # 取得選擇的裝置鍵值
+        device_name = device_selection.split(" ")[0]
+        selected_device = devices[device_name]
         res = os.popen(
             f'./{gguf_parser} --ctx-size={context_length} -url {url} --device-metric "{selected_device.FLOPS};{selected_device.memory_bandwidth}GBps" --json'
         ).read()
         estimate_df = get_estimate_df(parser_result.estimate)
+        gpus_info_df = get_gpus_df(parser_result.estimate, device_name, selected_device)
+        return model_info, estimate_df, gpus_info_df
     except Exception as e:
         return e
         os.system(f"wget {gguf_parser_url}&&chmod +x {gguf_parser}")
     with gr.Blocks(title="GGUF Parser") as iface:
+        url_input = gr.Textbox(
+            label="GGUF File URL", placeholder="Enter GGUF URL", value=DEFAULT_URL
+        )
         context_length = gr.Number(label="Context Length", value=8192)
         device_dropdown = gr.Dropdown(label="Select Device", choices=device_options)
         submit_btn = gr.Button("Send")
             outputs=[
                 gr.DataFrame(label="Model Info"),
                 gr.DataFrame(label="ESTIMATE"),
+                gr.DataFrame(label="GPUs INFO"),
             ],
         )
     iface.launch()