translators-will commited on
Commit
ac4f597
·
verified ·
1 Parent(s): 4ae41b2

Update data_clean_final.py

Browse files
Files changed (1) hide show
  1. data_clean_final.py +9 -8
data_clean_final.py CHANGED
@@ -7,12 +7,13 @@ from ctransformers import AutoModelForCausalLM
7
  import torch
8
 
9
  # Load local TinyLlama model
10
- model_name = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
11
- tokenizer = AutoTokenizer.from_pretrained(model_name)
12
- model = AutoModelForCausalLM.from_pretrained(model_name,
13
- model_file = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
14
- )
15
- generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
 
16
 
17
 
18
  # Function to get data cleaning suggestions from LLM
@@ -34,8 +35,8 @@ def suggest_llm_fixes_and_fill(column_name, examples):
34
  )
35
 
36
  try:
37
- response = generator(prompt, max_new_tokens=200, do_sample=True, temperature=0.7)
38
- return response[0]['generated_text'].split(prompt)[-1].strip()
39
 
40
  except Exception as e:
41
  error_message = f"LLM for error column {column_name}: {str(e)}"
 
7
  import torch
8
 
9
  # Load local TinyLlama model
10
+ llm = AutoModelForCausalLM.from_pretrained(
11
+ "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
12
+ model_file="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", # Update this to match your GGUF file name
13
+ model_type="llama",
14
+ gpu_layers=0 # Adjust for GPU support if available
15
+ )
16
+ # generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
17
 
18
 
19
  # Function to get data cleaning suggestions from LLM
 
35
  )
36
 
37
  try:
38
+ response = llm(prompt)
39
+ return response.strip()
40
 
41
  except Exception as e:
42
  error_message = f"LLM for error column {column_name}: {str(e)}"