Spaces:

phate334
/

gguf-parser-web

Running

App Files Files Community

phate334 commited on Nov 3, 2024

Commit

863856d

1 Parent(s): 6160b72

[add] device model

Browse files

Files changed (4) hide show

.vscode/settings.json +1 -1
app/devices.py +7 -0
devices.json +15 -15
main.py +17 -6

.vscode/settings.json CHANGED Viewed

@@ -3,7 +3,7 @@
         "editor.defaultFormatter": "ms-python.black-formatter",
         "editor.formatOnSave": true,
         "editor.codeActionsOnSave": {
-            "source.organizeImports": true
         },
       },
     "isort.args":["--profile", "black"],

         "editor.defaultFormatter": "ms-python.black-formatter",
         "editor.formatOnSave": true,
         "editor.codeActionsOnSave": {
+            "source.organizeImports": "explicit"
         },
       },
     "isort.args":["--profile", "black"],

app/devices.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from pydantic import BaseModel, Field
+class Device(BaseModel):
+    memory_size: int = Field(alias="memorySize")
+    memory_bandwidth: float = Field(alias="memoryBandwidth")
+    FLOPS: str

devices.json CHANGED Viewed

@@ -1,15 +1,15 @@
-[
-    {"model": "V100", "memory-size": 32, "memory-bandwidth": 900, "FLOPS": "112.224TFLOPS"},
-    {"model": "T4", "memory-size": 16, "memory-bandwidth": 320, "FLOPS": "64.8TFLOPS"},
-    {"model": "A2", "memory-size": 16, "memory-bandwidth": 200, "FLOPS": "18.124TFLOPS"},
-    {"model": "A10", "memory-size": 24, "memory-bandwidth": 600, "FLOPS": "124.96TFLOPS"},
-    {"model": "A16*4", "memory-size": 64, "memory-bandwidth": 800, "FLOPS": "73.728TFLOPS"},
-    {"model": "A30", "memory-size": 24, "memory-bandwidth": 933.1, "FLOPS": "165.12TFLOPS"},
-    {"model": "A40", "memory-size": 48, "memory-bandwidth": 695.8, "FLOPS": "149.68TFLOPS"},
-    {"model": "A100-40GB", "memory-size": 40, "memory-bandwidth": 1555, "FLOPS": "312.0TFLOPS"},
-    {"model": "A100-80GB", "memory-size": 80, "memory-bandwidth": 1555, "FLOPS": "312.0TFLOPS"},
-    {"model": "H100-PCIE", "memory-size": 80, "memory-bandwidth": 2039, "FLOPS": "756.449TFLOPS"},
-    {"model": "H100-SXM", "memory-size": 80, "memory-bandwidth": 3352, "FLOPS": "989.43TFLOPS"},
-    {"model": "L40", "memory-size": 48, "memory-bandwidth": 864, "FLOPS": "362.066TFLOPS"},
-    {"model": "L4", "memory-size": 24, "memory-bandwidth": 300, "FLOPS": "121.0TFLOPS"}
-]

+{
+    "V100": {"memorySize": 32, "memoryBandwidth": 900, "FLOPS": "112.224TFLOPS"},
+    "T4": {"memorySize": 16, "memoryBandwidth": 320, "FLOPS": "64.8TFLOPS"},
+    "A2": {"memorySize": 16, "memoryBandwidth": 200, "FLOPS": "18.124TFLOPS"},
+    "A10": {"memorySize": 24, "memoryBandwidth": 600, "FLOPS": "124.96TFLOPS"},
+    "A16*4": {"memorySize": 64, "memoryBandwidth": 800, "FLOPS": "73.728TFLOPS"},
+    "A30": {"memorySize": 24, "memoryBandwidth": 933.1, "FLOPS": "165.12TFLOPS"},
+    "A40": {"memorySize": 48, "memoryBandwidth": 695.8, "FLOPS": "149.68TFLOPS"},
+    "A100-40GB": {"memorySize": 40, "memoryBandwidth": 1555, "FLOPS": "312.0TFLOPS"},
+    "A100-80GB": {"memorySize": 80, "memoryBandwidth": 1555, "FLOPS": "312.0TFLOPS"},
+    "H100-PCIE": {"memorySize": 80, "memoryBandwidth": 2039, "FLOPS": "756.449TFLOPS"},
+    "H100-SXM": {"memorySize": 80, "memoryBandwidth": 3352, "FLOPS": "989.43TFLOPS"},
+    "L40": {"memorySize": 48, "memoryBandwidth": 864, "FLOPS": "362.066TFLOPS"},
+    "L4": {"memorySize": 24, "memoryBandwidth": 300, "FLOPS": "121.0TFLOPS"}
+}

main.py CHANGED Viewed

@@ -5,6 +5,7 @@ from pathlib import Path
 import gradio as gr
 import pandas as pd
 from app.models import GgufParser
 from app.tables import get_estimate_df, get_model_info_df
@@ -13,11 +14,23 @@ gguf_parser = Path("gguf-parser-linux-amd64")
 gguf_parser_url = f"https://github.com/gpustack/gguf-parser-go/releases/download/{GGUF_PARSER_VERSION}/{gguf_parser}"
 DEFAULT_URL = "https://huggingface.co/phate334/Llama-3.1-8B-Instruct-Q4_K_M-GGUF/resolve/main/llama-3.1-8b-instruct-q4_k_m.gguf"
-def process_url(url, context_length):
     try:
         res = os.popen(
-            f"./{gguf_parser} --ctx-size={context_length} -url {url} --json"
         ).read()
         parser_result = GgufParser.model_validate_json(res)
@@ -36,17 +49,15 @@ if __name__ == "__main__":
     if not gguf_parser.exists():
         os.system(f"wget {gguf_parser_url}&&chmod +x {gguf_parser}")
-    with open("devices.json", "r", encoding="utf-8") as f:
-        device_list = json.load(f)
     with gr.Blocks(title="GGUF Parser") as iface:
         url_input = gr.Textbox(placeholder="Enter GGUF URL", value=DEFAULT_URL)
         context_length = gr.Number(label="Context Length", value=8192)
         submit_btn = gr.Button("Send")
         submit_btn.click(
             fn=process_url,
-            inputs=[url_input, context_length],
             outputs=[
                 gr.DataFrame(label="Model Info"),
                 gr.DataFrame(label="ESTIMATE"),

 import gradio as gr
 import pandas as pd
+from app.devices import Device
 from app.models import GgufParser
 from app.tables import get_estimate_df, get_model_info_df
 gguf_parser_url = f"https://github.com/gpustack/gguf-parser-go/releases/download/{GGUF_PARSER_VERSION}/{gguf_parser}"
 DEFAULT_URL = "https://huggingface.co/phate334/Llama-3.1-8B-Instruct-Q4_K_M-GGUF/resolve/main/llama-3.1-8b-instruct-q4_k_m.gguf"
+with open("devices.json", "r", encoding="utf-8") as f:
+    data = json.load(f)
+    devices = {key: Device(**value) for key, value in data.items()}
+device_options = [
+    f"{key} (Memory: {value.memory_size}GB, Bandwidth: {value.memory_bandwidth}GB/s)"
+    for key, value in devices.items()
+]
+def process_url(url, context_length, device_selection):
     try:
+        # 取得選擇的裝置鍵值
+        device_key = device_selection.split(" ")[0]
+        selected_device = devices[device_key]
         res = os.popen(
+            f'./{gguf_parser} --ctx-size={context_length} -url {url} --device-metric "{selected_device.FLOPS};{selected_device.memory_bandwidth}GBps" --json'
         ).read()
         parser_result = GgufParser.model_validate_json(res)
     if not gguf_parser.exists():
         os.system(f"wget {gguf_parser_url}&&chmod +x {gguf_parser}")
     with gr.Blocks(title="GGUF Parser") as iface:
         url_input = gr.Textbox(placeholder="Enter GGUF URL", value=DEFAULT_URL)
         context_length = gr.Number(label="Context Length", value=8192)
+        device_dropdown = gr.Dropdown(label="Select Device", choices=device_options)
         submit_btn = gr.Button("Send")
         submit_btn.click(
             fn=process_url,
+            inputs=[url_input, context_length, device_dropdown],
             outputs=[
                 gr.DataFrame(label="Model Info"),
                 gr.DataFrame(label="ESTIMATE"),