Spaces:

jedick
/

R-help-chat

Running on Zero

jedick commited on Aug 2

Commit

b4a8032

1 Parent(s): c3715f2

Use transformers==4.54.1

Files changed (3) hide show

main.py CHANGED Viewed

@@ -156,9 +156,12 @@ def GetChatModel(compute_mode, ckpt_dir=None):
             torch_dtype=torch.bfloat16,
             # Enable FlashAttention (requires pip install flash-attn)
             # https://huggingface.co/docs/transformers/en/attention_interface
-            # https://huggingface.co/docs/transformers/perf_infer_gpu_one#flashattention-2
             attn_implementation="flash_attention_2",
         )
         # Use MyTextGenerationPipeline with custom preprocess() method
         pipe = MyTextGenerationPipeline(

             torch_dtype=torch.bfloat16,
             # Enable FlashAttention (requires pip install flash-attn)
             # https://huggingface.co/docs/transformers/en/attention_interface
+            # https://huggingface.co/docs/transformers/perf_infer_gpu_one#flashattention
             attn_implementation="flash_attention_2",
+            device_map="auto",
         )
+        # For Flash Attention version of Qwen3
+        tokenizer.padding_side = "left"
         # Use MyTextGenerationPipeline with custom preprocess() method
         pipe = MyTextGenerationPipeline(

mods/tool_calling_llm.py CHANGED Viewed

@@ -177,9 +177,9 @@ class ToolCallingLLM(BaseChatModel, ABC):
         # Extract <think>...</think> content and text after </think> for further processing 20250726 jmd
         think_text, post_think = extract_think(response_message.content)
-        ## For debugging
-        # print("post_think")
-        # print(post_think)
         # Remove backticks around code blocks
         post_think = re.sub(r"^```json", "", post_think)

         # Extract <think>...</think> content and text after </think> for further processing 20250726 jmd
         think_text, post_think = extract_think(response_message.content)
+        # For debugging
+        print("post_think")
+        print(post_think)
         # Remove backticks around code blocks
         post_think = re.sub(r"^```json", "", post_think)

requirements.txt CHANGED Viewed

@@ -13,7 +13,7 @@ flash-attn==2.8.2
 #   SmolLM3: transformers>=4.53
 # NOTE: Gemma 3 with transformers==4.54.0 gives:
 #   ValueError: Max cache length is not consistent across layers
-transformers==4.51.3
 tokenizers==0.21.2
 # Required by langchain-huggingface
 sentence-transformers==5.0.0

 #   SmolLM3: transformers>=4.53
 # NOTE: Gemma 3 with transformers==4.54.0 gives:
 #   ValueError: Max cache length is not consistent across layers
+transformers==4.54.1
 tokenizers==0.21.2
 # Required by langchain-huggingface
 sentence-transformers==5.0.0