Spaces:

Luigi
/

tiny-scribe

Running

Luigi commited on Jan 30

Commit

c16840d

1 Parent(s): 01dc9b6

add -c option to force cpu only

Files changed (1) hide show

summarize_transcript.py CHANGED Viewed

@@ -8,14 +8,14 @@ import argparse
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
-def load_model(repo_id, filename):
     """Load the model from Hugging Face Hub."""
-    # Initialize the model with SYCL support
     llm = Llama.from_pretrained(
         repo_id=repo_id,
         filename=filename,
-        n_gpu_layers=-1,  # Use all layers on GPU
         seed=1337,
         n_ctx=32768,       # Context size
         verbose=True,     # Reduced verbosity for cleaner output
@@ -88,6 +88,7 @@ def main():
     parser.add_argument("-m", "--model", type=str,
                         default="bartowski/baidu_ERNIE-4.5-0.3B-PT-GGUF:Q6_K",
                         help="HuggingFace model in format repo_id:quant (e.g., Luigi/Falcon-H1-Tiny-Multilingual-100M-Instruct-GGUF:IQ4_NL)")
     args = parser.parse_args()
     # Parse model argument if provided
@@ -98,10 +99,10 @@ def main():
         print(f"Error: Invalid model format '{args.model}'. Expected format: repo_id:quant")
         return
-    print(f"Loading model: {repo_id} ({filename}) with SYCL acceleration...")
     # Load the model
-    llm = load_model(repo_id, filename)
     # Read the transcript
     transcript_path = args.input

 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
+def load_model(repo_id, filename, cpu_only=False):
     """Load the model from Hugging Face Hub."""
+    # Initialize the model with SYCL support (or CPU only if requested)
     llm = Llama.from_pretrained(
         repo_id=repo_id,
         filename=filename,
+        n_gpu_layers=0 if cpu_only else -1,  # 0 for CPU, -1 for all layers on GPU
         seed=1337,
         n_ctx=32768,       # Context size
         verbose=True,     # Reduced verbosity for cleaner output
     parser.add_argument("-m", "--model", type=str,
                         default="bartowski/baidu_ERNIE-4.5-0.3B-PT-GGUF:Q6_K",
                         help="HuggingFace model in format repo_id:quant (e.g., Luigi/Falcon-H1-Tiny-Multilingual-100M-Instruct-GGUF:IQ4_NL)")
+    parser.add_argument("-c", "--cpu", action="store_true", help="Force CPU only inference")
     args = parser.parse_args()
     # Parse model argument if provided
         print(f"Error: Invalid model format '{args.model}'. Expected format: repo_id:quant")
         return
+    print(f"Loading model: {repo_id} ({filename}) with {'CPU only' if args.cpu else 'SYCL acceleration'}...")
     # Load the model
+    llm = load_model(repo_id, filename, cpu_only=args.cpu)
     # Read the transcript
     transcript_path = args.input