VcRlAgent commited on
Commit
b426cda
·
1 Parent(s): 318caa2

Testing Natural Lang SQL

Browse files
Files changed (4) hide show
  1. HybridJiraRAG.py +89 -0
  2. app.py +52 -1
  3. requirements.txt +11 -15
  4. requirements.txt.bak +15 -0
HybridJiraRAG.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.utilities import SQLDatabase
2
+ from langchain_community.agent_toolkits import create_sql_agent
3
+ from langchain_community.vectorstores import FAISS
4
+ from langchain.chains import RetrievalQA
5
+ from langchain.prompts import PromptTemplate
6
+
7
+ # ✅ REPLACE OpenAI with HuggingFace models
8
+ from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
9
+
10
+ # Alternative: Use transformers directly for more control
11
+ from langchain_community.llms import HuggingFacePipeline
12
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
13
+ import torch
14
+
15
+ class HybridJiraRAG:
16
+ """Hybrid RAG system for HuggingFace Spaces GPU"""
17
+
18
+ def __init__(
19
+ self,
20
+ sql_db_uri: str,
21
+ vector_store_path: str,
22
+ hf_token: str = None,
23
+ model_name: str = "meta-llama/Llama-3.2-3B-Instruct"
24
+ ):
25
+ # Option 1: Use HF Inference API (doesn't use your GPU)
26
+ # self.llm = HuggingFaceEndpoint(
27
+ # repo_id=model_name,
28
+ # huggingfacehub_api_token=hf_token,
29
+ # temperature=0.1,
30
+ # max_new_tokens=512
31
+ # )
32
+
33
+ # Option 2: Load model locally on GPU (RECOMMENDED for HF Spaces)
34
+ self.llm = self._load_local_llm(model_name)
35
+
36
+ # SQL Agent
37
+ self.sql_db = SQLDatabase.from_uri(sql_db_uri)
38
+ self.sql_agent = create_sql_agent(
39
+ self.llm,
40
+ db=self.sql_db,
41
+ agent_type="zero-shot-react-description", # More compatible
42
+ verbose=True
43
+ )
44
+
45
+ # Embeddings - Use local HuggingFace model
46
+ embeddings = HuggingFaceEmbeddings(
47
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
48
+ model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'}
49
+ )
50
+
51
+ # Vector store
52
+ self.vector_store = FAISS.load_local(
53
+ vector_store_path,
54
+ embeddings,
55
+ allow_dangerous_deserialization=True
56
+ )
57
+
58
+ # RAG chain
59
+ self.rag_chain = RetrievalQA.from_chain_type(
60
+ llm=self.llm,
61
+ retriever=self.vector_store.as_retriever(search_kwargs={"k": 5}),
62
+ return_source_documents=True
63
+ )
64
+
65
+ def _load_local_llm(self, model_name: str):
66
+ """Load LLM locally to use GPU"""
67
+ # Load model on GPU
68
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
69
+ model = AutoModelForCausalLM.from_pretrained(
70
+ model_name,
71
+ torch_dtype=torch.float16, # Use FP16 for GPU efficiency
72
+ device_map="auto", # Automatically use GPU
73
+ trust_remote_code=True
74
+ )
75
+
76
+ # Create text generation pipeline
77
+ pipe = pipeline(
78
+ "text-generation",
79
+ model=model,
80
+ tokenizer=tokenizer,
81
+ max_new_tokens=512,
82
+ temperature=0.1,
83
+ do_sample=True,
84
+ top_p=0.95,
85
+ repetition_penalty=1.15
86
+ )
87
+
88
+ # Wrap in LangChain
89
+ return HuggingFacePipeline(pipeline=pipe)
app.py CHANGED
@@ -4,6 +4,14 @@ from langchain_community.agent_toolkits import create_sql_agent
4
  import pandas as pd
5
  from sqlalchemy import create_engine
6
  from datetime import datetime, timedelta
 
 
 
 
 
 
 
 
7
 
8
  # Sample Jira data structure
9
  jira_data = {
@@ -65,4 +73,47 @@ for q in questions:
65
  print(f"Q: {q}")
66
  print(f"{'='*60}")
67
  result = agent.invoke(q)
68
- print(f"A: {result['output']}\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import pandas as pd
5
  from sqlalchemy import create_engine
6
  from datetime import datetime, timedelta
7
+ from langchain.chains import LLMChain
8
+ from langchain.prompts import PromptTemplate
9
+ import os
10
+ import gradio as gr
11
+ from openai import OpenAI
12
+ from huggingface_hub import InferenceClient
13
+ from hybrid_rag import HybridJiraRAG
14
+
15
 
16
  # Sample Jira data structure
17
  jira_data = {
 
73
  print(f"Q: {q}")
74
  print(f"{'='*60}")
75
  result = agent.invoke(q)
76
+ print(f"A: {result['output']}\n")
77
+
78
+
79
+
80
+ # After getting SQL results, format them nicely
81
+ def ask_with_formatting(question: str):
82
+ # Generate and execute SQL
83
+ sql = sql_chain.invoke({"question": question})
84
+ raw_result = execute_query.invoke(sql)
85
+
86
+ # Format result in natural language
87
+ format_prompt = PromptTemplate(
88
+ template="""Given the question and SQL result, provide a clear natural language answer.
89
+
90
+ Question: {question}
91
+ SQL Result: {result}
92
+
93
+ Natural language answer:""",
94
+ input_variables=["question", "result"]
95
+ )
96
+
97
+ format_chain = LLMChain(llm=llm, prompt=format_prompt)
98
+ formatted = format_chain.invoke({
99
+ "question": question,
100
+ "result": raw_result
101
+ })
102
+
103
+ return formatted['text']
104
+
105
+ # Usage
106
+ print(ask_with_formatting("What's the average resolution time for P1 tickets?"))
107
+ # Output: "The average resolution time for P1 priority tickets is 36 hours, or approximately 1.5 days."
108
+
109
+
110
+ # Build Gradio UI
111
+ demo = gr.Interface(
112
+ fn=ask_llm,
113
+ inputs=gr.Textbox(lines=3, label="Ask the AI"),
114
+ outputs=gr.Textbox(label="Response"),
115
+ title="HF Inference Client LLM Demo",
116
+ description="Powered by HuggingFace InferenceClient SDK."
117
+ )
118
+
119
+ demo.launch()
requirements.txt CHANGED
@@ -1,15 +1,11 @@
1
- openai>=1.51.0
2
- huggingface-hub>=0.25.0 # only needed if you use InferenceClient later
3
- httpx>=0.27.0 # used internally by OpenAI SDK
4
- python-dotenv>=1.0.1 # if you load HF_TOKEN from .env
5
-
6
- langchain==0.1.0
7
- langchain-community==0.0.13
8
- langchain-huggingface==0.0.1
9
- transformers==4.36.0
10
- torch==2.1.0
11
- sentence-transformers==2.2.2
12
- faiss-cpu==1.7.4 # or faiss-gpu if using GPU for embeddings too
13
- sqlalchemy==2.0.23
14
- accelerate==0.25.0
15
- bitsandbytes==0.41.3 # For quantization (optional)
 
1
+ # Minimal set that works well together
2
+ langchain
3
+ langchain-community
4
+ langchain-huggingface
5
+ transformers
6
+ torch
7
+ sentence-transformers
8
+ faiss-cpu
9
+ sqlalchemy
10
+ accelerate
11
+ gradio
 
 
 
 
requirements.txt.bak ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ openai>=1.51.0
2
+ huggingface-hub>=0.25.0 # only needed if you use InferenceClient later
3
+ httpx>=0.27.0 # used internally by OpenAI SDK
4
+ python-dotenv>=1.0.1 # if you load HF_TOKEN from .env
5
+
6
+ langchain==0.1.0
7
+ langchain-community==0.0.13
8
+ langchain-huggingface==0.0.1
9
+ transformers==4.36.0
10
+ torch==2.1.0
11
+ sentence-transformers==2.2.2
12
+ faiss-cpu==1.7.4 # or faiss-gpu if using GPU for embeddings too
13
+ sqlalchemy==2.0.23
14
+ accelerate==0.25.0
15
+ bitsandbytes==0.41.3 # For quantization (optional)