import gradio as gr import requests import pandas as pd import time from llama_cpp import Llama from huggingface_hub import hf_hub_download # ========================= # LOAD GGUF MODEL # ========================= print("Downloading GGUF model...") model_path = hf_hub_download( # repo_id="bartowski/Qwen2.5-1.5B-Instruct-GGUF", # filename="Qwen2.5-1.5B-Instruct-Q4_K_M.gguf" # repo_id="bartowski/Qwen2.5-0.5B-Instruct-GGUF", # filename="Qwen2.5-0.5B-Instruct-Q4_K_M.gguf" repo_id="Qwen/Qwen2.5-1.5B-Instruct-GGUF", filename="qwen2.5-1.5b-instruct-q5_k_m.gguf" ) print("Loading model...") llm = Llama( model_path=model_path, n_ctx=1024, n_threads=2, n_batch=512, verbose=False ) # ========================= # ENV VARIABLES (use HF Secrets ideally) # ========================= client_id = "sb-cap1-3c4588e0trial-dev!t617058" client_secret = "acbe78be-ead5-4b12-b3b4-32fdb27d0f5f$hFj-hDXxwHkNHC-CAvv-OKSr3KH96nLL4KqwIg7M8D8=" token_url = "https://3c4588e0trial.authentication.us10.hana.ondemand.com/oauth/token" cap_service_url_customers = "https://3c4588e0trial-dev-cap1-srv.cfapps.us10-001.hana.ondemand.com/odata/v4/sales/Customers?$top=2" cap_service_url_products = "https://3c4588e0trial-dev-cap1-srv.cfapps.us10-001.hana.ondemand.com/odata/v4/sales/Products?$top=2" cap_service_url_saleorders = "https://3c4588e0trial-dev-cap1-srv.cfapps.us10-001.hana.ondemand.com/odata/v4/sales/SalesOrders?$top=2" cap_service_url_saleorderitems = "https://3c4588e0trial-dev-cap1-srv.cfapps.us10-001.hana.ondemand.com/odata/v4/sales/SalesOrderItems?$top=2" # ========================= # GLOBAL CACHE # ========================= access_token = None cached_data = None last_refresh = 0 # ========================= # TOKEN FUNCTION # ========================= def generate_token(): global access_token response = requests.post( token_url, data={"grant_type": "client_credentials"}, auth=(client_id, client_secret) ) if response.status_code != 200: return None access_token = response.json().get("access_token") return access_token # ========================= # FETCH SAP DATA # ========================= def fetch_sap_data(): global access_token if not access_token: generate_token() headers = { "Authorization": f"Bearer {access_token}", "Accept": "application/json" } res1 = requests.get(cap_service_url_customers, headers=headers) res2 = requests.get(cap_service_url_products, headers=headers) res3 = requests.get(cap_service_url_saleorders, headers=headers) res4 = requests.get(cap_service_url_saleorderitems, headers=headers) # Retry if token expired if res1.status_code in [401, 403]: access_token = None generate_token() headers["Authorization"] = f"Bearer {access_token}" res1 = requests.get(cap_service_url_customers, headers=headers) res2 = requests.get(cap_service_url_products, headers=headers) res3 = requests.get(cap_service_url_saleorders, headers=headers) res4 = requests.get(cap_service_url_saleorderitems, headers=headers) df_customers = pd.DataFrame(res1.json()["value"]) df_products = pd.DataFrame(res2.json()["value"]) df_saleorders = pd.DataFrame(res3.json()["value"]) df_saleorderitems = pd.DataFrame(res4.json()["value"]) # Reduce columns (IMPORTANT for speed) df_customers = df_customers[["ID","name","country","industry"]] df_products = df_products[["ID","name","category","price","currency"]] df_saleorders = df_saleorders[["ID","customer_ID","orderDate","status"]] df_saleorderitems = df_saleorderitems[["ID","parent_ID","product_ID","quantity","netAmount"]] return df_customers, df_products, df_saleorders, df_saleorderitems # ========================= # CACHE FUNCTION # ========================= def get_cached_data(): global cached_data, last_refresh # Refresh every 5 minutes if time.time() - last_refresh > 3000 or cached_data is None: cached_data = fetch_sap_data() last_refresh = time.time() return cached_data # ========================= # MAIN LLM FUNCTION # ========================= def generate_response(user_prompt): try: df_customers, df_products, df_saleorders, df_saleorderitems = get_cached_data() # Convert to compact text (IMPORTANT) customers_text = df_customers.to_string(index=False) products_text = df_products.to_string(index=False) saleorders_text = df_saleorders.to_string(index=False) saleorderitems_text = df_saleorderitems.to_string(index=False) prompt = f""" Your purpose is to answer the user's questions based strictly on the database records provided to you. Customers Data: {customers_text} Products Data: {products_text} Sale orders Data: {saleorders_text} Sale order items Data: {saleorderitems_text} RULES: 1. NO HALLUCINATIONS: You must base your answer ONLY on the data provided. 2. MISSING DATA: If the provided data does not contain the answer, do not guess. Say: "I could not find that information in the current SAP database." 3. FORMATTING: You must output your response in Markdown. Use bold text for important nouns and bullet points for lists to make it easy to read. 4. TONE: Be concise, highly professional, and helpful. User: {user_prompt} Assistant: """ output = llm( prompt, max_tokens=100, temperature=0.3, top_p=0.7, stop=["User:", "Assistant:"] ) response = output["choices"][0]["text"].strip() return response except Exception as e: return f"Error: {str(e)}" # ========================= # GRADIO UI # ========================= with gr.Blocks() as demo: user_input = gr.Textbox(label="User Question") output = gr.Textbox(label="Response") btn = gr.Button("Generate") btn.click( fn=generate_response, inputs=[user_input], outputs=output, api_name="predict" ) # REQUIRED for API exposure demo.queue() demo.launch()