Spaces:

brendon-ai
/

faq-huggingface-model

Sleeping

brendon-ai commited on Jul 4, 2025

Commit

a31db3e

verified ·

1 Parent(s): 202a4ca

Update src/RAGSample.py

Files changed (1) hide show

src/RAGSample.py CHANGED Viewed

@@ -19,6 +19,8 @@ from typing import Optional, List
 import re
 import torch
 import subprocess
 # OPTION 1: Use Hugging Face Pipeline (Recommended for HF Spaces)
 from transformers import pipeline
@@ -367,28 +369,34 @@ Answer:
 """,
         input_variables=["question", "documents"],
     )
     # Initialize a local Hugging Face model
     hf_pipeline = pipeline(
-        # "text-generation",
-        # model="microsoft/BioGPT",
-        # tokenizer="microsoft/BioGPT",
-        # max_new_tokens=100,      # Reduced for stability
-        # max_length=1024,         # BioGPT's context length
-        # temperature=0.2,         # Lower for more focused responses
-        # device_map="auto",
-        # torch_dtype=torch.float16,
-        # return_full_text=False,
-        # truncation=True,
-        # do_sample=True,
-        # pad_token_id=1,
-        # eos_token_id=2,
         "text-generation",
-        model="microsoft/BioGPT",
-        tokenizer="microsoft/BioGPT",
-        max_new_tokens=50,  # Very small for testing
         device_map="auto",
-        torch_dtype=torch.float16
     )
     # Wrap it in LangChain

 import re
 import torch
 import subprocess
+# Load tokenizer and model separately to configure properly
+from transformers import AutoTokenizer, AutoModelForCausalLM
 # OPTION 1: Use Hugging Face Pipeline (Recommended for HF Spaces)
 from transformers import pipeline
 """,
         input_variables=["question", "documents"],
     )
+    tokenizer = AutoTokenizer.from_pretrained("microsoft/BioGPT")
+    model = AutoModelForCausalLM.from_pretrained(
+        "microsoft/BioGPT",
+        device_map="auto",
+        torch_dtype=torch.float16
+    )
+    # Fix the tokenizer configuration
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
     # Initialize a local Hugging Face model
     hf_pipeline = pipeline(
         "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        max_new_tokens=100,      # Reduced for stability
+        max_length=1024,         # BioGPT's context length
+        temperature=0.2,         # Lower for more focused responses
         device_map="auto",
+        torch_dtype=torch.float16,
+        return_full_text=False,
+        truncation=True,
+        do_sample=True,
+        pad_token_id=1,
+        eos_token_id=2,
+        "text-generation"
     )
     # Wrap it in LangChain