Spaces:

datajoi
/

Dataset-Test-Workflow

Sleeping

App Files Files Community

Mustehson commited on Oct 18, 2024

Commit

5023c74

1 Parent(s): 9d5557c

Langsmith Logs

Browse files

Files changed (2) hide show

app.py +36 -18
requirements.txt +7 -4

app.py CHANGED Viewed

@@ -6,8 +6,10 @@ import pandas as pd
 import pandera as pa
 from pandera import Column
 import ydata_profiling as pp
-from huggingface_hub import InferenceClient
 from prompt import PROMPT_PANDERA, PANDERA_USER_INPUT_PROMPT
 import warnings
 warnings.filterwarnings("ignore", category=DeprecationWarning)
@@ -16,12 +18,9 @@ warnings.filterwarnings("ignore", category=DeprecationWarning)
 TAB_LINES = 8
 # Load Token
 md_token = os.getenv('MD_TOKEN')
-os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN')
 INPUT_PROMPT = '''
-Here is the frist few samples of data:
 <Sample Data>
 {data}
 </Sample Data<>
@@ -29,7 +28,7 @@ Here is the frist few samples of data:
 USER_INPUT = '''
-Here is the frist few samples of data:
 <Sample Data>
 {data}
 </Sample Data<>
@@ -44,7 +43,22 @@ Here is the User Description:
 print('Connecting to DB...')
 # Connect to DB
 conn = duckdb.connect(f"md:my_db?motherduck_token={md_token}", read_only=True)
-client = InferenceClient("meta-llama/Meta-Llama-3-70B-Instruct")
 # Get Databases
 def get_schemas():
@@ -71,29 +85,33 @@ def get_data_df(schema):
 def chat_template(system_prompt, user_prompt, df):
-    messages=[
-        {"role": "system", "content": system_prompt},
-        {"role": "user", "content": user_prompt.format(data=df.head().to_json(orient='records'))},
     ]
     return messages
 def chat_template_user(system_prompt, user_prompt, user_description, df):
-    messages=[
-        {"role": "system", "content": system_prompt},
-        {"role": "user", "content": user_prompt.format(data=df.head(1).to_json(orient='records'), user_description=user_description)},
     ]
-    return messages
 def run_llm(messages):
   try:
-    response = client.chat_completion(messages, max_tokens=1024)
-    print(response.choices[0].message.content)
-    tests = json.loads(response.choices[0].message.content)
   except Exception as e:
       return e
   return tests
 # Get Schema
 def get_table_schema(table):
     result = conn.sql(f"SELECT sql, database_name, schema_name FROM duckdb_tables() where table_name ='{table}';").df()

 import pandera as pa
 from pandera import Column
 import ydata_profiling as pp
+from langchain_core.messages import HumanMessage, SystemMessage
+from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
 from prompt import PROMPT_PANDERA, PANDERA_USER_INPUT_PROMPT
+from langsmith import traceable
 import warnings
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 TAB_LINES = 8
 # Load Token
 md_token = os.getenv('MD_TOKEN')
 INPUT_PROMPT = '''
+Here are the first few samples of data:
 <Sample Data>
 {data}
 </Sample Data<>
 USER_INPUT = '''
+Here are the first few samples of data:
 <Sample Data>
 {data}
 </Sample Data<>
 print('Connecting to DB...')
 # Connect to DB
 conn = duckdb.connect(f"md:my_db?motherduck_token={md_token}", read_only=True)
+models = ["Qwen/Qwen2.5-72B-Instruct","meta-llama/Meta-Llama-3-70B-Instruct",
+          "meta-llama/Llama-3.1-70B-Instruct"]
+model_loaded = False
+for model in models:
+  try:
+      endpoint = HuggingFaceEndpoint(repo_id=model, max_new_tokens=8192)
+      info = endpoint.client.get_endpoint_info()
+      model_loaded = True
+      break
+  except Exception as e:
+      print(f"Error for model {model}: {e}")
+      continue
+llm = ChatHuggingFace(llm=endpoint).bind_tools(tools=[], max_tokens=8192)
 # Get Databases
 def get_schemas():
 def chat_template(system_prompt, user_prompt, df):
+    messages = [
+        SystemMessage(content=system_prompt),
+        HumanMessage(content=user_prompt.format(data=df.head().to_json(orient='records'))),
     ]
     return messages
 def chat_template_user(system_prompt, user_prompt, user_description, df):
+    messages = [
+        SystemMessage(content=system_prompt),
+        HumanMessage(content=user_prompt.format(data=df.head(1).to_json(orient='records'), user_description=user_description)),
     ]
+    return messages
+@traceable()
 def run_llm(messages):
   try:
+    response = llm.invoke(messages)
+    print(response.content)
+    tests = json.loads(response.content)
   except Exception as e:
       return e
   return tests
 # Get Schema
 def get_table_schema(table):
     result = conn.sql(f"SELECT sql, database_name, schema_name FROM duckdb_tables() where table_name ='{table}';").df()

requirements.txt CHANGED Viewed

@@ -1,7 +1,10 @@
 torch
 huggingface_hub
-accelerate
 transformers==4.44.2
-duckdb
-pandera
-ydata-profiling

 torch
 huggingface_hub
+langchain_huggingface
+accelerate==0.34.2
 transformers==4.44.2
+duckdb==1.1.1
+langsmith==0.1.135
+pandera==0.20.4
+ydata-profiling==v4.11.0
+langchain-core==0.3.12