Spaces:

translators-will
/

LLM-Data-Cleaner

Sleeping

translators-will commited on Apr 9, 2025

Commit

ac4f597

verified ·

1 Parent(s): 4ae41b2

Update data_clean_final.py

Files changed (1) hide show

data_clean_final.py CHANGED Viewed

@@ -7,12 +7,13 @@ from ctransformers import AutoModelForCausalLM
 import torch
 # Load local TinyLlama model
-model_name = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name,
-                                            model_file = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
-                                            )
-generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
 # Function to get data cleaning suggestions from LLM
@@ -34,8 +35,8 @@ def suggest_llm_fixes_and_fill(column_name, examples):
     )
     try:
-        response = generator(prompt, max_new_tokens=200, do_sample=True, temperature=0.7)
-        return response[0]['generated_text'].split(prompt)[-1].strip()
     except Exception as e:
         error_message = f"LLM for error column {column_name}: {str(e)}"

 import torch
 # Load local TinyLlama model
+llm = AutoModelForCausalLM.from_pretrained(
+    "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
+    model_file="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",  # Update this to match your GGUF file name
+    model_type="llama",
+    gpu_layers=0  # Adjust for GPU support if available
+)
+# generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
 # Function to get data cleaning suggestions from LLM
     )
     try:
+        response = llm(prompt)
+        return response.strip()
     except Exception as e:
         error_message = f"LLM for error column {column_name}: {str(e)}"