Spaces:

dvwn
/

nl2sql-api

Sleeping

App Files Files Community

dvwn commited on May 14

Commit

e06da36

1 Parent(s): a4607e0

hf_engine.py & sql_agent.py version 1.1.0

Browse files

Files changed (2) hide show

backend/src/nl2sql/hf_engine.py +18 -12
backend/src/nl2sql/sql_agent.py +46 -46

backend/src/nl2sql/hf_engine.py CHANGED Viewed

@@ -6,10 +6,6 @@ from langchain_huggingface import HuggingFaceEndpoint
 from langchain_core.language_models.llms import LLM
 from typing import Any, List, Optional
-# Default Model
-# DEFAULT_MODEL_ID = "defog/llama-3-sqlcoder-8b:featherless-ai"
-# DEFAULT_MODEL_ID = "defog/sqlcoder-7b-2"
-# DEFAULT_MODEL_ID = "Qwen/Qwen2.5-Coder-7B-Instruct:featherless-ai"
 # Model Registry: Add several model to be tested
 MODEL_REGISTRY = {
     "defog/sqlcoder-7b-2": "text",
@@ -19,7 +15,7 @@ MODEL_REGISTRY = {
     #"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B:featherless-ai": "chat"
 }
-ACTIVE_MODEL_ID = "Qwen/Qwen2.5-Coder-32B-Instruct:featherless-ai"
 # Custom LangChain wrapper for HuggingFace Inference API
 class HFChatWrapper(LLM):
@@ -43,9 +39,13 @@ class HFChatWrapper(LLM):
     @property
     def _llm_type(self) -> str:
         return "huggingface_inference_client"
 # Initialize the HuggingFace endpoint using the InferenceClient
-def get_llm(model_id: str = ACTIVE_MODEL_ID):
     """
     Automatically detects the model type and returns the correct LangChain interface.
     Initializes the HuggingFace InferenceClient and returns an LLM instance for generating SQL queries.
@@ -55,16 +55,22 @@ def get_llm(model_id: str = ACTIVE_MODEL_ID):
     if not hf_token:
         raise ValueError("HuggingFace API token not found!")
-    model_type = MODEL_REGISTRY.get(model_id, "chat")
-    print(f"Initializing HuggingFace InferenceClient with model: {model_id}")
     if model_type == "chat":
         client = InferenceClient(api_key=hf_token)
-        return HFChatWrapper(client=client, model_id=model_id)
     elif model_type == "text":
         # Route to standard Text Generation API
         return HuggingFaceEndpoint(
-            repo_id=model_id,
             task="text-generation",
             max_new_tokens=512,
             temperature=0.0,
@@ -77,7 +83,7 @@ def get_llm(model_id: str = ACTIVE_MODEL_ID):
     # Initialize the HuggingFace InferenceClient
     #client = InferenceClient(api_key=hf_token)
-    #llm = HFChatWrapper(client=client, model_id=model_id)
     #return llm

 from langchain_core.language_models.llms import LLM
 from typing import Any, List, Optional
 # Model Registry: Add several model to be tested
 MODEL_REGISTRY = {
     "defog/sqlcoder-7b-2": "text",
     #"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B:featherless-ai": "chat"
 }
+DEFAULT_MODEL_ID = "Qwen/Qwen2.5-Coder-32B-Instruct:featherless-ai"
 # Custom LangChain wrapper for HuggingFace Inference API
 class HFChatWrapper(LLM):
     @property
     def _llm_type(self) -> str:
         return "huggingface_inference_client"
+def get_models() -> List[str]:
+    """Utility to return all model IDs available in the MODEL_REGISTRY."""
+    return list(MODEL_REGISTRY.keys())
 # Initialize the HuggingFace endpoint using the InferenceClient
+def get_llm(model_id: str = DEFAULT_MODEL_ID):
     """
     Automatically detects the model type and returns the correct LangChain interface.
     Initializes the HuggingFace InferenceClient and returns an LLM instance for generating SQL queries.
     if not hf_token:
         raise ValueError("HuggingFace API token not found!")
+    # Determine the model type based on the MODEL_REGISTRY
+    active_model = model_id if model_id else DEFAULT_MODEL_ID
+    if active_model not in MODEL_REGISTRY:
+        print(f"Warning: Model '{active_model}' not found in MODEL_REGISTRY. Defaulting to 'chat' type.")
+    model_type = MODEL_REGISTRY.get(active_model, "chat")
+    print(f"Initializing HuggingFace InferenceClient with model: {active_model}")
     if model_type == "chat":
         client = InferenceClient(api_key=hf_token)
+        return HFChatWrapper(client=client, model_id=active_model)
     elif model_type == "text":
         # Route to standard Text Generation API
         return HuggingFaceEndpoint(
+            repo_id=active_model,
             task="text-generation",
             max_new_tokens=512,
             temperature=0.0,
     # Initialize the HuggingFace InferenceClient
     #client = InferenceClient(api_key=hf_token)
+    #llm = HFChatWrapper(client=client, model_id=active_model)
     #return llm

backend/src/nl2sql/sql_agent.py CHANGED Viewed

@@ -84,7 +84,7 @@ def clean_sql(raw_sql: str) -> str:
     return cleaned.strip()
 # Function to handle NL2SQL conversion
-def nl2sql_agent(user_question: str, max_retries: int = 3) -> dict:
     """
     Complete flow execution with Auto-correction:
     Get Schema context -> Generate SQL query -> Execute SQL query -> If Error, Refine & Retry ->Return results
@@ -94,7 +94,7 @@ def nl2sql_agent(user_question: str, max_retries: int = 3) -> dict:
     schema = get_schema_context(question = user_question)
     # Generate SQL query using the schema context and user question
-    llm = get_llm()
     # LangChain Pipeline: Pipe prompt into LLM
     chain = prompt_template | llm
@@ -107,7 +107,7 @@ def nl2sql_agent(user_question: str, max_retries: int = 3) -> dict:
     # Auto-correction Loop
     for attempt in range(1, max_retries + 1):
         if attempt == 1:
-            print("Generating initial SQL query...")
             raw_response = chain.invoke({
                 "schema": schema,
                 "question": user_question
@@ -122,50 +122,50 @@ def nl2sql_agent(user_question: str, max_retries: int = 3) -> dict:
                 "error_message": error_message
             })
-            # Parse & clean the generated SQL query
-            generated_sql = clean_sql(raw_response)
-            current_sql = generated_sql
-            print(f"Generated SQL: \n{generated_sql}")
-            # Execute the generated SQL query and fetch results
-            connection = get_db_connection()
-            if not connection:
-                return {
-                    "query": generated_sql,
-                    "error": "Could not establish database connection",
-                    "status": "failed"
-                }
-            try:
-                cursor = connection.cursor()
-                cursor.execute(generated_sql)
-                results = cursor.fetchall()
-                if attempt > 1:
-                    print(f"SQL query executed successfully after {attempt} attempts.")
-                # Generate natural language response based on the results
-                print("Generating natural language response based on query results...")
-                nl_response = nl_chain.invoke({
-                    "question": user_question,
-                    "results": str(results)
-                })
-                return {
-                    "query": generated_sql,
-                    "results": results,
-                    "nl_response": nl_response,
-                    "status": "success",
-                    "attempts": attempt
-                }
-            except Exception as e:
-                error_message = str(e)
-                print(f"Error executing SQL: {error_message}")
-                if attempt == max_retries:
-                    print("Max retries reached. Returning error.")
-            finally:
-                connection.close()
     return {
         "query": current_sql,

     return cleaned.strip()
 # Function to handle NL2SQL conversion
+def nl2sql_agent(user_question: str, max_retries: int = 3, model_id: str = None) -> dict:
     """
     Complete flow execution with Auto-correction:
     Get Schema context -> Generate SQL query -> Execute SQL query -> If Error, Refine & Retry ->Return results
     schema = get_schema_context(question = user_question)
     # Generate SQL query using the schema context and user question
+    llm = get_llm(model_id=model_id)
     # LangChain Pipeline: Pipe prompt into LLM
     chain = prompt_template | llm
     # Auto-correction Loop
     for attempt in range(1, max_retries + 1):
         if attempt == 1:
+            print(f"Generating initial SQL query using {model_id or 'default model'}...")
             raw_response = chain.invoke({
                 "schema": schema,
                 "question": user_question
                 "error_message": error_message
             })
+        # Parse & clean the generated SQL query
+        generated_sql = clean_sql(raw_response)
+        current_sql = generated_sql
+        print(f"Generated SQL: \n{generated_sql}")
+        # Execute the generated SQL query and fetch results
+        connection = get_db_connection()
+        if not connection:
+            return {
+                "query": generated_sql,
+                "error": "Could not establish database connection",
+                "status": "failed"
+            }
+        try:
+            cursor = connection.cursor()
+            cursor.execute(generated_sql)
+            results = cursor.fetchall()
+            if attempt > 1:
+                print(f"SQL query executed successfully after {attempt} attempts.")
+            # Generate natural language response based on the results
+            print("Generating natural language response based on query results...")
+            nl_response = nl_chain.invoke({
+                "question": user_question,
+                "results": str(results)
+            })
+            return {
+                "query": generated_sql,
+                "results": results,
+                "nl_response": nl_response,
+                "status": "success",
+                "attempts": attempt
+            }
+        except Exception as e:
+            error_message = str(e)
+            print(f"Error executing SQL: {error_message}")
+            if attempt == max_retries:
+                print("Max retries reached. Returning error.")
+        finally:
+            connection.close()
     return {
         "query": current_sql,