Spaces:

orachamp1981
/

oracle-llm

Sleeping

App Files Files Community

orachamp1981 commited on Jun 26, 2025

Commit

53d89cd

verified ·

1 Parent(s): 9762962

Upload 4 files

Browse files

Files changed (3) hide show

data_loader.py +27 -5
model.py +34 -28
sql_templates.py +2 -2

data_loader.py CHANGED Viewed

@@ -1,10 +1,32 @@
 # data_loader.py
 def load_rules(file_path="data/train_data.txt"):
     data = {}
-    with open(file_path, "r", encoding="utf-8") as file:
-        for line in file:
-            if "=" in line:
-                key, value = line.strip().split("=", 1)
-                data[key.strip().lower()] = value.strip()
     return data

 # data_loader.py
+import os
 def load_rules(file_path="data/train_data.txt"):
     data = {}
+    if os.path.exists(file_path):
+        with open(file_path, "r", encoding="utf-8") as file:
+            for line in file:
+                if "=" in line:
+                    key, value = line.strip().split("=", 1)
+                    data[key.strip().lower()] = value.strip()
     return data
+def detect_domain(prompt):
+    prompt = prompt.lower()
+    if any(word in prompt for word in ["salary", "financial", "transaction", "ledger"]):
+        return "data/finance.txt"
+    elif any(word in prompt for word in ["employee", "hr", "hiring"]):
+        return "data/hr.txt"
+    elif any(word in prompt for word in ["sale", "customer", "order"]):
+        return "data/sales.txt"
+    else:
+        return None
+def load_rules_by_domain(prompt):
+    domain_file = detect_domain(prompt)
+    if domain_file and os.path.exists(domain_file):
+        domain_rules = load_rules(domain_file)
+        if prompt in domain_rules:
+            return domain_rules[prompt]
+    return None  # fallback will be handled in main logic

model.py CHANGED Viewed

@@ -1,68 +1,75 @@
 from sentence_transformers import SentenceTransformer, util
 from sql_templates import sql_templates, sql_keyword_aliases, fuzzy_aliases, conflicting_phrases, greeting_templates
-from data_loader import load_rules
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-# 📘 Load rules
-rules = load_rules()
 # 🔍 Load semantic model
 model = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2")
-train_prompts = list(rules.keys())
-train_embeddings = model.encode(train_prompts, convert_to_tensor=True)
 # 🤖 Load local LLM model
 llm_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 tokenizer = AutoTokenizer.from_pretrained(llm_name)
-llm_model = AutoModelForCausalLM.from_pretrained(llm_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
-llm_pipeline = pipeline("text-generation", model=llm_model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
 def oracle_sql_suggester(prompt):
     prompt_clean = prompt.strip().lower()
-    # ✅ Exact rule
-    if prompt_clean in rules:
-        return rules[prompt_clean]
-    # ✅ Greeting handling
     for greet_key, greet_reply in greeting_templates.items():
         if greet_key in prompt_clean:
             return greet_reply
-    # ✅ Conflicting phrase
     for terms, response in conflicting_phrases.items():
         if all(term in prompt_clean for term in terms):
             return response
-    # ✅ Keyword alias
     for word in prompt_clean.split():
         if word in sql_keyword_aliases:
             mapped_key = sql_keyword_aliases[word]
             return sql_templates.get(mapped_key)
-    # ✅ Template match
     for key, template in sql_templates.items():
         if key in prompt_clean or key.replace("_", " ") in prompt_clean:
             return template
-    # ✅ Fuzzy match
     for fuzzy_phrase, mapped_key in fuzzy_aliases.items():
         if fuzzy_phrase in prompt_clean:
             return sql_templates.get(mapped_key)
-    # ✅ Semantic match
-    user_embedding = model.encode(prompt_clean, convert_to_tensor=True)
-    cosine_scores = util.cos_sim(user_embedding, train_embeddings)
-    top_match_index = torch.argmax(cosine_scores).item()
-    top_score = cosine_scores[0][top_match_index].item()
-    if top_score >= 0.7:
-        matched_prompt = train_prompts[top_match_index]
-        return rules[matched_prompt]
-    # ✅ Local LLM fallback
     try:
         prompt_text = f"Generate an Oracle SQL query or guidance for the following request:\n{prompt}\n\nSQL:"
         output = llm_pipeline(prompt_text, max_new_tokens=256, do_sample=True, temperature=0.5)[0]["generated_text"]
@@ -70,4 +77,3 @@ def oracle_sql_suggester(prompt):
     except Exception as e:
         print("⚠️ Local LLM error:", e)
         return "🤖 Sorry, I couldn’t process that locally. Please try a simpler prompt."

 from sentence_transformers import SentenceTransformer, util
 from sql_templates import sql_templates, sql_keyword_aliases, fuzzy_aliases, conflicting_phrases, greeting_templates
+from data_loader import load_rules, load_rules_by_domain, detect_domain
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 # 🔍 Load semantic model
 model = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2")
 # 🤖 Load local LLM model
 llm_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 tokenizer = AutoTokenizer.from_pretrained(llm_name)
+llm_model = AutoModelForCausalLM.from_pretrained(
+    llm_name,
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
+)
+llm_pipeline = pipeline(
+    "text-generation",
+    model=llm_model,
+    tokenizer=tokenizer,
+    device=0 if torch.cuda.is_available() else -1
+)
+# ✅ Load global training rules once for semantic match
+global_rules = load_rules("data/train_data.txt")
+train_prompts = list(global_rules.keys())
+train_embeddings = model.encode(train_prompts, convert_to_tensor=True) if train_prompts else None
 def oracle_sql_suggester(prompt):
     prompt_clean = prompt.strip().lower()
+    # ✅ Step 1: Exact match in domain-specific rules
+    domain_match = load_rules_by_domain(prompt_clean)
+    if domain_match:
+        #return domain_match
+        return domain_match.replace("\\n", "\n")
+    # ✅ Step 2: Check hardcoded greeting or conflict response
     for greet_key, greet_reply in greeting_templates.items():
         if greet_key in prompt_clean:
             return greet_reply
     for terms, response in conflicting_phrases.items():
         if all(term in prompt_clean for term in terms):
             return response
+    # ✅ Step 3: Aliases and fuzzy matching
     for word in prompt_clean.split():
         if word in sql_keyword_aliases:
             mapped_key = sql_keyword_aliases[word]
             return sql_templates.get(mapped_key)
     for key, template in sql_templates.items():
         if key in prompt_clean or key.replace("_", " ") in prompt_clean:
             return template
     for fuzzy_phrase, mapped_key in fuzzy_aliases.items():
         if fuzzy_phrase in prompt_clean:
             return sql_templates.get(mapped_key)
+    # ✅ Step 4: Semantic match against full train_data.txt
+    if train_embeddings is not None and len(train_embeddings) > 0:
+        user_embedding = model.encode(prompt_clean, convert_to_tensor=True)
+        cosine_scores = util.cos_sim(user_embedding, train_embeddings)
+        top_match_index = torch.argmax(cosine_scores).item()
+        top_score = cosine_scores[0][top_match_index].item()
+        if top_score >= 0.7:
+            matched_prompt = train_prompts[top_match_index]
+            return global_rules[matched_prompt].replace("\\n", "\n")  # ⬅️ Support multiline
+    # ✅ Step 5: LLM Fallback
     try:
         prompt_text = f"Generate an Oracle SQL query or guidance for the following request:\n{prompt}\n\nSQL:"
         output = llm_pipeline(prompt_text, max_new_tokens=256, do_sample=True, temperature=0.5)[0]["generated_text"]
     except Exception as e:
         print("⚠️ Local LLM error:", e)
         return "🤖 Sorry, I couldn’t process that locally. Please try a simpler prompt."

sql_templates.py CHANGED Viewed

@@ -1,3 +1,5 @@
 from collections import defaultdict
 sql_templates = {
@@ -23,7 +25,6 @@ sql_keyword_aliases = {
     "delete": "delete"
 }
-# 🧠 NEW fuzzy aliases
 fuzzy_aliases = {
     "grouped result": "group_by",
     "combine tables": "join_example",
@@ -46,7 +47,6 @@ conflicting_phrases = {
     ("delete", "new"): "⚠️ You cannot delete something that doesn't exist yet.",
 }
-# 🤖 Greeting phrases and responses
 greeting_templates = {
     "hello": "👋 Hello! How can I assist you with SQL or PL/SQL today?",
     "hi": "👋 Hi there! Need help with Oracle SQL or PL/SQL?",

+# sql_templates.py
 from collections import defaultdict
 sql_templates = {
     "delete": "delete"
 }
 fuzzy_aliases = {
     "grouped result": "group_by",
     "combine tables": "join_example",
     ("delete", "new"): "⚠️ You cannot delete something that doesn't exist yet.",
 }
 greeting_templates = {
     "hello": "👋 Hello! How can I assist you with SQL or PL/SQL today?",
     "hi": "👋 Hi there! Need help with Oracle SQL or PL/SQL?",