Spaces:

Chamin09
/

ChatCSV

Sleeping

App Files Files Community

Chamin09 commited on Apr 22, 2025

Commit

92b64f3

verified ·

1 Parent(s): 17f9952

Update models/llm_setup.py

Browse files

Files changed (1) hide show

models/llm_setup.py +55 -61

models/llm_setup.py CHANGED Viewed

@@ -1,68 +1,62 @@
 import torch
-#from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
-from llama_index.llms.huggingface import HuggingFaceLLM
-#from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
-#from llama_index.llms.huggingface import HuggingFaceLLM
-def setup_llm(model_name: str = "microsoft/phi-3-mini-4k-instruct",
-              context_window: int = 4096,
-              max_new_tokens: int = 512):
-    """Set up the language model for the CSV chatbot."""
-    try:
-        # Initialize LLM with the correct import
-        llm = HuggingFaceInferenceAPI(
-            model_name=model_name,
-            tokenizer_name=model_name,
-            context_window=context_window,
-            max_new_tokens=max_new_tokens,
-            generate_kwargs={"temperature": 0.7, "top_p": 0.95}
-        )
-        return llm
-    except Exception as e:
-        print(f"Error initializing HuggingFaceInferenceAPI: {e}")
-        # Fallback to a simpler approach if needed
-        from transformers import pipeline
         try:
-            # Use a smaller model as fallback
-            pipe = pipeline(
                 "text-generation",
-                model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-                torch_dtype="auto",
-                device_map="auto",
             )
-            # Create a simple wrapper to match LlamaIndex's expected interface
-            class SimpleLLM:
-                def complete(self, prompt):
-                    class Response:
-                        def __init__(self, text):
-                            self.text = text
-                    result = pipe(
-                        prompt,
-                        max_new_tokens=max_new_tokens,
-                        temperature=0.7,
-                        do_sample=True
-                    )
-                    generated_text = result[0]["generated_text"][len(prompt):]
-                    return Response(generated_text)
-            return SimpleLLM()
-        except Exception as e2:
-            print(f"Fallback initialization also failed: {e2}")
-            # Last resort - dummy LLM
-            class DummyLLM:
-                def complete(self, prompt):
-                    class Response:
-                        def __init__(self, text):
-                            self.text = text
-                    return Response("Model initialization failed. Please check logs.")
-            return DummyLLM()

 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+class SimpleTransformersLLM:
+    """A simple wrapper for Hugging Face Transformers models."""
+    def __init__(self, model_name="google/flan-t5-small"):
+        """Initialize with a small model that works on CPU."""
         try:
+            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+            self.pipe = pipeline(
                 "text-generation",
+                model=model_name,
+                tokenizer=self.tokenizer,
+                max_length=512,
+                device_map="auto"
             )
+        except Exception as e:
+            print(f"Error initializing model: {e}")
+            self.pipe = None
+    def complete(self, prompt):
+        """Complete a prompt with the model."""
+        class Response:
+            def __init__(self, text):
+                self.text = text
+        if self.pipe is None:
+            return Response("Model initialization failed.")
+        try:
+            result = self.pipe(prompt, max_length=len(prompt) + 200, do_sample=True)
+            generated_text = result[0]["generated_text"]
+            # Extract only the new text (not including the prompt)
+            response_text = generated_text[len(prompt):].strip()
+            if not response_text:
+                response_text = "I couldn't generate a proper response."
+            return Response(response_text)
+        except Exception as e:
+            print(f"Error generating response: {e}")
+            return Response(f"Error generating response: {str(e)}")
+def setup_llm():
+    """Set up a simple LLM that doesn't require API keys."""
+    try:
+        # Try with a very small model first
+        return SimpleTransformersLLM("google/flan-t5-small")
+    except Exception as e:
+        print(f"Error setting up LLM: {e}")
+        # Fallback to dummy LLM
+        class DummyLLM:
+            def complete(self, prompt):
+                class Response:
+                    def __init__(self, text):
+                        self.text = text
+                return Response("This is a dummy response. The actual model couldn't be loaded.")
+        return DummyLLM()