Spaces:

VcRlAgent
/

TestLLMGen

Runtime error

App Files Files Community

VcRlAgent commited on Nov 16, 2025

Commit

b426cda

1 Parent(s): 318caa2

Testing Natural Lang SQL

Browse files

Files changed (4) hide show

HybridJiraRAG.py +89 -0
app.py +52 -1
requirements.txt +11 -15
requirements.txt.bak +15 -0

HybridJiraRAG.py ADDED Viewed

	@@ -0,0 +1,89 @@

+from langchain_community.utilities import SQLDatabase
+from langchain_community.agent_toolkits import create_sql_agent
+from langchain_community.vectorstores import FAISS
+from langchain.chains import RetrievalQA
+from langchain.prompts import PromptTemplate
+# ✅ REPLACE OpenAI with HuggingFace models
+from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
+# Alternative: Use transformers directly for more control
+from langchain_community.llms import HuggingFacePipeline
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+import torch
+class HybridJiraRAG:
+    """Hybrid RAG system for HuggingFace Spaces GPU"""
+    def __init__(
+        self,
+        sql_db_uri: str,
+        vector_store_path: str,
+        hf_token: str = None,
+        model_name: str = "meta-llama/Llama-3.2-3B-Instruct"
+    ):
+        # Option 1: Use HF Inference API (doesn't use your GPU)
+        # self.llm = HuggingFaceEndpoint(
+        #     repo_id=model_name,
+        #     huggingfacehub_api_token=hf_token,
+        #     temperature=0.1,
+        #     max_new_tokens=512
+        # )
+        # Option 2: Load model locally on GPU (RECOMMENDED for HF Spaces)
+        self.llm = self._load_local_llm(model_name)
+        # SQL Agent
+        self.sql_db = SQLDatabase.from_uri(sql_db_uri)
+        self.sql_agent = create_sql_agent(
+            self.llm,
+            db=self.sql_db,
+            agent_type="zero-shot-react-description",  # More compatible
+            verbose=True
+        )
+        # Embeddings - Use local HuggingFace model
+        embeddings = HuggingFaceEmbeddings(
+            model_name="sentence-transformers/all-MiniLM-L6-v2",
+            model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'}
+        )
+        # Vector store
+        self.vector_store = FAISS.load_local(
+            vector_store_path,
+            embeddings,
+            allow_dangerous_deserialization=True
+        )
+        # RAG chain
+        self.rag_chain = RetrievalQA.from_chain_type(
+            llm=self.llm,
+            retriever=self.vector_store.as_retriever(search_kwargs={"k": 5}),
+            return_source_documents=True
+        )
+    def _load_local_llm(self, model_name: str):
+        """Load LLM locally to use GPU"""
+        # Load model on GPU
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=torch.float16,  # Use FP16 for GPU efficiency
+            device_map="auto",  # Automatically use GPU
+            trust_remote_code=True
+        )
+        # Create text generation pipeline
+        pipe = pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            max_new_tokens=512,
+            temperature=0.1,
+            do_sample=True,
+            top_p=0.95,
+            repetition_penalty=1.15
+        )
+        # Wrap in LangChain
+        return HuggingFacePipeline(pipeline=pipe)

app.py CHANGED Viewed

@@ -4,6 +4,14 @@ from langchain_community.agent_toolkits import create_sql_agent
 import pandas as pd
 from sqlalchemy import create_engine
 from datetime import datetime, timedelta
 # Sample Jira data structure
 jira_data = {
@@ -65,4 +73,47 @@ for q in questions:
     print(f"Q: {q}")
     print(f"{'='*60}")
     result = agent.invoke(q)
-    print(f"A: {result['output']}\n")

 import pandas as pd
 from sqlalchemy import create_engine
 from datetime import datetime, timedelta
+from langchain.chains import LLMChain
+from langchain.prompts import PromptTemplate
+import os
+import gradio as gr
+from openai import OpenAI
+from huggingface_hub import InferenceClient
+from hybrid_rag import HybridJiraRAG
 # Sample Jira data structure
 jira_data = {
     print(f"Q: {q}")
     print(f"{'='*60}")
     result = agent.invoke(q)
+    print(f"A: {result['output']}\n")
+# After getting SQL results, format them nicely
+def ask_with_formatting(question: str):
+    # Generate and execute SQL
+    sql = sql_chain.invoke({"question": question})
+    raw_result = execute_query.invoke(sql)
+    # Format result in natural language
+    format_prompt = PromptTemplate(
+        template="""Given the question and SQL result, provide a clear natural language answer.
+        Question: {question}
+        SQL Result: {result}
+        Natural language answer:""",
+                input_variables=["question", "result"]
+    )
+    format_chain = LLMChain(llm=llm, prompt=format_prompt)
+    formatted = format_chain.invoke({
+        "question": question,
+        "result": raw_result
+    })
+    return formatted['text']
+# Usage
+print(ask_with_formatting("What's the average resolution time for P1 tickets?"))
+# Output: "The average resolution time for P1 priority tickets is 36 hours, or approximately 1.5 days."
+# Build Gradio UI
+demo = gr.Interface(
+    fn=ask_llm,
+    inputs=gr.Textbox(lines=3, label="Ask the AI"),
+    outputs=gr.Textbox(label="Response"),
+    title="HF Inference Client LLM Demo",
+    description="Powered by HuggingFace InferenceClient SDK."
+)
+demo.launch()

requirements.txt CHANGED Viewed

@@ -1,15 +1,11 @@
-openai>=1.51.0
-huggingface-hub>=0.25.0   # only needed if you use InferenceClient later
-httpx>=0.27.0             # used internally by OpenAI SDK
-python-dotenv>=1.0.1      # if you load HF_TOKEN from .env
-langchain==0.1.0
-langchain-community==0.0.13
-langchain-huggingface==0.0.1
-transformers==4.36.0
-torch==2.1.0
-sentence-transformers==2.2.2
-faiss-cpu==1.7.4  # or faiss-gpu if using GPU for embeddings too
-sqlalchemy==2.0.23
-accelerate==0.25.0
-bitsandbytes==0.41.3  # For quantization (optional)

+# Minimal set that works well together
+langchain
+langchain-community
+langchain-huggingface
+transformers
+torch
+sentence-transformers
+faiss-cpu
+sqlalchemy
+accelerate
+gradio

requirements.txt.bak ADDED Viewed

	@@ -0,0 +1,15 @@

+openai>=1.51.0
+huggingface-hub>=0.25.0   # only needed if you use InferenceClient later
+httpx>=0.27.0             # used internally by OpenAI SDK
+python-dotenv>=1.0.1      # if you load HF_TOKEN from .env
+langchain==0.1.0
+langchain-community==0.0.13
+langchain-huggingface==0.0.1
+transformers==4.36.0
+torch==2.1.0
+sentence-transformers==2.2.2
+faiss-cpu==1.7.4  # or faiss-gpu if using GPU for embeddings too
+sqlalchemy==2.0.23
+accelerate==0.25.0
+bitsandbytes==0.41.3  # For quantization (optional)