Spaces:

logasanjeev
/

DocTalk

Sleeping

logasanjeev commited on Apr 20, 2025

Commit

8b40c0d

verified ·

1 Parent(s): 34f6f25

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,7 +16,6 @@ import chromadb
 import tempfile
 from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
 import requests
-from transformers import BitsAndBytesConfig
 # Set up logging
 logging.basicConfig(level=logging.INFO)
@@ -161,16 +160,13 @@ def initialize_qa_chain(llm_model, temperature):
         return "Please process documents first.", None
     try:
-        # Enable 4-bit quantization for all models to reduce memory usage
-        quantization_config = BitsAndBytesConfig(load_in_4bit=True)
         llm = HuggingFaceEndpoint(
             repo_id=LLM_MODELS[llm_model],
             task="text-generation",
             temperature=float(temperature),
             max_new_tokens=512,
             huggingfacehub_api_token=os.environ["HUGGINGFACEHUB_API_TOKEN"],
-            timeout=30,
-            model_kwargs={"quantization_config": quantization_config}
         )
         # Dynamically set k based on vector store size
         collection = vector_store._collection

 import tempfile
 from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
 import requests
 # Set up logging
 logging.basicConfig(level=logging.INFO)
         return "Please process documents first.", None
     try:
         llm = HuggingFaceEndpoint(
             repo_id=LLM_MODELS[llm_model],
             task="text-generation",
             temperature=float(temperature),
             max_new_tokens=512,
             huggingfacehub_api_token=os.environ["HUGGINGFACEHUB_API_TOKEN"],
+            timeout=30
         )
         # Dynamically set k based on vector store size
         collection = vector_store._collection