Spaces:

Roxanne-WANG
/

LangSQL

Paused

App Files Files Community

Roxanne-WANG commited on Apr 20

Commit

6670a17

1 Parent(s): b053c71

update model

Browse files

Files changed (2) hide show

app.py +0 -54
text2sql.py +2 -2

app.py CHANGED Viewed

@@ -76,60 +76,6 @@ from utils.db_utils import add_a_record
 from langdetect.lang_detect_exception import LangDetectException
 import os
-# Suppress excessive warnings from Hugging Face transformers library
-hf_logging.set_verbosity_error()
-# SchemaItemClassifierInference class for loading the Hugging Face model
-class SchemaItemClassifierInference:
-    def __init__(self, model_name: str, token=None):
-        """
-        model_name: Hugging Face repository path, e.g., "Roxanne-WANG/LangSQL"
-        token: Authentication token for Hugging Face (if the model is private)
-        """
-        # Load the tokenizer and model from Hugging Face, trust remote code if needed
-        self.tokenizer = AutoTokenizer.from_pretrained(
-            model_name,
-            use_auth_token=token,  # Pass the token for accessing private models
-            trust_remote_code=True  # Trust custom model code from Hugging Face repo
-        )
-        self.model = AutoModelForSequenceClassification.from_pretrained(
-            model_name,
-            use_auth_token=token,
-            trust_remote_code=True
-        )
-    def predict(self, text: str):
-        # Tokenize the input text and get predictions from the model
-        inputs = self.tokenizer(
-            text,
-            return_tensors="pt",
-            padding=True,
-            truncation=True
-        )
-        outputs = self.model(**inputs)
-        return outputs.logits
-# ChatBot class that interacts with SchemaItemClassifierInference
-class ChatBot:
-    def __init__(self):
-        # Specify the Hugging Face model name (replace with your model's path)
-        model_name = "Roxanne-WANG/LangSQL"
-        hf_token = os.getenv('HF_TOKEN')  # Get token from environment variables
-        if hf_token is None:
-            raise ValueError("Hugging Face token is required. Please set HF_TOKEN.")
-        # Initialize the schema item classifier with Hugging Face token
-        self.sic = SchemaItemClassifierInference(model_name, token=hf_token)
-    def get_response(self, question: str, db_id: str):
-        # Get the model's prediction (logits) for the input question
-        logits = self.sic.predict(question)
-        # For now, return logits as a placeholder for the actual SQL query
-        return logits
 # -------- Streamlit Web Application --------
 text2sql_bot = ChatBot()
 baidu_api_token = None  # Your Baidu API token (if needed for translation)

 from langdetect.lang_detect_exception import LangDetectException
 import os
 # -------- Streamlit Web Application --------
 text2sql_bot = ChatBot()
 baidu_api_token = None  # Your Baidu API token (if needed for translation)

text2sql.py CHANGED Viewed

@@ -104,14 +104,14 @@ def get_db_id2ddl(db_path):
 class ChatBot():
     def __init__(self) -> None:
         os.environ["CUDA_VISIBLE_DEVICES"] = "0"
-        model_name = "seeklhy/codes-7b-merged"
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
         self.model = AutoModelForCausalLM.from_pretrained(model_name, device_map = "auto", torch_dtype = torch.float16)
         self.max_length = 4096
         self.max_new_tokens = 256
         self.max_prefix_length = self.max_length - self.max_new_tokens
-        self.sic = SchemaItemClassifierInference("sic_ckpts/sic_bird")
         self.db_id2content_searcher = dict()
         for db_id in os.listdir("db_contents_index"):

 class ChatBot():
     def __init__(self) -> None:
         os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+        model_name = "seeklhy/codes-1b"
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
         self.model = AutoModelForCausalLM.from_pretrained(model_name, device_map = "auto", torch_dtype = torch.float16)
         self.max_length = 4096
         self.max_new_tokens = 256
         self.max_prefix_length = self.max_length - self.max_new_tokens
+        self.sic = SchemaItemClassifierInference("Roxanne-WANG/LangSQL", token=os.getenv('HF_TOKEN'))
         self.db_id2content_searcher = dict()
         for db_id in os.listdir("db_contents_index"):