Spc3 / app.py
Vedant104's picture
Update app.py
1a7c909 verified
import gradio as gr
import requests
import pandas as pd
import time
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# =========================
# LOAD GGUF MODEL
# =========================
print("Downloading GGUF model...")
model_path = hf_hub_download(
# repo_id="bartowski/Qwen2.5-1.5B-Instruct-GGUF",
# filename="Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
# repo_id="bartowski/Qwen2.5-0.5B-Instruct-GGUF",
# filename="Qwen2.5-0.5B-Instruct-Q4_K_M.gguf"
repo_id="Qwen/Qwen2.5-1.5B-Instruct-GGUF",
filename="qwen2.5-1.5b-instruct-q5_k_m.gguf"
)
print("Loading model...")
llm = Llama(
model_path=model_path,
n_ctx=1024,
n_threads=2,
n_batch=512,
verbose=False
)
# =========================
# ENV VARIABLES (use HF Secrets ideally)
# =========================
client_id = "sb-cap1-3c4588e0trial-dev!t617058"
client_secret = "acbe78be-ead5-4b12-b3b4-32fdb27d0f5f$hFj-hDXxwHkNHC-CAvv-OKSr3KH96nLL4KqwIg7M8D8="
token_url = "https://3c4588e0trial.authentication.us10.hana.ondemand.com/oauth/token"
cap_service_url_customers = "https://3c4588e0trial-dev-cap1-srv.cfapps.us10-001.hana.ondemand.com/odata/v4/sales/Customers?$top=2"
cap_service_url_products = "https://3c4588e0trial-dev-cap1-srv.cfapps.us10-001.hana.ondemand.com/odata/v4/sales/Products?$top=2"
cap_service_url_saleorders = "https://3c4588e0trial-dev-cap1-srv.cfapps.us10-001.hana.ondemand.com/odata/v4/sales/SalesOrders?$top=2"
cap_service_url_saleorderitems = "https://3c4588e0trial-dev-cap1-srv.cfapps.us10-001.hana.ondemand.com/odata/v4/sales/SalesOrderItems?$top=2"
# =========================
# GLOBAL CACHE
# =========================
access_token = None
cached_data = None
last_refresh = 0
# =========================
# TOKEN FUNCTION
# =========================
def generate_token():
global access_token
response = requests.post(
token_url,
data={"grant_type": "client_credentials"},
auth=(client_id, client_secret)
)
if response.status_code != 200:
return None
access_token = response.json().get("access_token")
return access_token
# =========================
# FETCH SAP DATA
# =========================
def fetch_sap_data():
global access_token
if not access_token:
generate_token()
headers = {
"Authorization": f"Bearer {access_token}",
"Accept": "application/json"
}
res1 = requests.get(cap_service_url_customers, headers=headers)
res2 = requests.get(cap_service_url_products, headers=headers)
res3 = requests.get(cap_service_url_saleorders, headers=headers)
res4 = requests.get(cap_service_url_saleorderitems, headers=headers)
# Retry if token expired
if res1.status_code in [401, 403]:
access_token = None
generate_token()
headers["Authorization"] = f"Bearer {access_token}"
res1 = requests.get(cap_service_url_customers, headers=headers)
res2 = requests.get(cap_service_url_products, headers=headers)
res3 = requests.get(cap_service_url_saleorders, headers=headers)
res4 = requests.get(cap_service_url_saleorderitems, headers=headers)
df_customers = pd.DataFrame(res1.json()["value"])
df_products = pd.DataFrame(res2.json()["value"])
df_saleorders = pd.DataFrame(res3.json()["value"])
df_saleorderitems = pd.DataFrame(res4.json()["value"])
# Reduce columns (IMPORTANT for speed)
df_customers = df_customers[["ID","name","country","industry"]]
df_products = df_products[["ID","name","category","price","currency"]]
df_saleorders = df_saleorders[["ID","customer_ID","orderDate","status"]]
df_saleorderitems = df_saleorderitems[["ID","parent_ID","product_ID","quantity","netAmount"]]
return df_customers, df_products, df_saleorders, df_saleorderitems
# =========================
# CACHE FUNCTION
# =========================
def get_cached_data():
global cached_data, last_refresh
# Refresh every 5 minutes
if time.time() - last_refresh > 3000 or cached_data is None:
cached_data = fetch_sap_data()
last_refresh = time.time()
return cached_data
# =========================
# MAIN LLM FUNCTION
# =========================
def generate_response(user_prompt):
try:
df_customers, df_products, df_saleorders, df_saleorderitems = get_cached_data()
# Convert to compact text (IMPORTANT)
customers_text = df_customers.to_string(index=False)
products_text = df_products.to_string(index=False)
saleorders_text = df_saleorders.to_string(index=False)
saleorderitems_text = df_saleorderitems.to_string(index=False)
prompt = f"""
Your purpose is to answer the user's questions based strictly on the database records provided to you.
Customers Data: {customers_text}
Products Data: {products_text}
Sale orders Data: {saleorders_text}
Sale order items Data: {saleorderitems_text}
RULES:
1. NO HALLUCINATIONS: You must base your answer ONLY on the data provided.
2. MISSING DATA: If the provided data does not contain the answer, do not guess. Say: "I could not find that information in the current SAP database."
3. FORMATTING: You must output your response in Markdown. Use bold text for important nouns and bullet points for lists to make it easy to read.
4. TONE: Be concise, highly professional, and helpful.
User: {user_prompt}
Assistant:
"""
output = llm(
prompt,
max_tokens=100,
temperature=0.3,
top_p=0.7,
stop=["User:", "Assistant:"]
)
response = output["choices"][0]["text"].strip()
return response
except Exception as e:
return f"Error: {str(e)}"
# =========================
# GRADIO UI
# =========================
with gr.Blocks() as demo:
user_input = gr.Textbox(label="User Question")
output = gr.Textbox(label="Response")
btn = gr.Button("Generate")
btn.click(
fn=generate_response,
inputs=[user_input],
outputs=output,
api_name="predict"
)
# REQUIRED for API exposure
demo.queue()
demo.launch()