Spaces:

openpecha
/

bec-dot.orc-api

Sleeping

ta4tsering commited on Feb 23

Commit

aeca817

1 Parent(s): 15c943a

refactor: update model loading logic to improve compatibility and adjust output textbox settings

Files changed (1) hide show

app.py CHANGED Viewed

@@ -59,24 +59,23 @@ def load_model():
     model_path = snapshot_download(
         repo_id=MODEL_ID,
         local_dir=MODEL_DIR,
-        local_dir_use_symlinks=False,
     )
     patch_configuration_dots(model_path)
     sys.path.insert(0, model_path)
-    # Try flash_attention_2 first, fall back to sdpa
     attn_impl = "flash_attention_2"
     try:
         import flash_attn  # noqa: F401
     except ImportError:
-        attn_impl = "sdpa"
     print(f"Loading model with attn_implementation={attn_impl} ...")
     model = AutoModelForCausalLM.from_pretrained(
         model_path,
         attn_implementation=attn_impl,
-        torch_dtype=torch.bfloat16,
         device_map="auto",
         trust_remote_code=True,
     )
@@ -195,7 +194,6 @@ print(result)
             output_text = gr.Textbox(
                 label="Model Output",
                 lines=20,
-                show_copy_button=True,
             )
     run_btn.click(

     model_path = snapshot_download(
         repo_id=MODEL_ID,
         local_dir=MODEL_DIR,
     )
     patch_configuration_dots(model_path)
     sys.path.insert(0, model_path)
+    # Try flash_attention_2 first, fall back to eager for compatibility.
     attn_impl = "flash_attention_2"
     try:
         import flash_attn  # noqa: F401
     except ImportError:
+        attn_impl = "eager"
     print(f"Loading model with attn_implementation={attn_impl} ...")
     model = AutoModelForCausalLM.from_pretrained(
         model_path,
         attn_implementation=attn_impl,
+        dtype=torch.bfloat16,
         device_map="auto",
         trust_remote_code=True,
     )
             output_text = gr.Textbox(
                 label="Model Output",
                 lines=20,
             )
     run_btn.click(