| # import gradio as gr | |
| # import os | |
| # import time | |
| # import requests | |
| # import concurrent.futures | |
| # import json | |
| # import re | |
| # from huggingface_hub import hf_hub_download | |
| # from llama_cpp import Llama | |
| # # ========================= | |
| # # ENV VARIABLES | |
| # # ========================= | |
| # client_id = os.getenv("sap_client_id") | |
| # client_secret = os.getenv("sap_client_secret") | |
| # token_url = os.getenv("sap_token_url") | |
| # urls = { | |
| # "customers": os.getenv("sap_cap_service_url_customers"), | |
| # "products": os.getenv("sap_cap_service_url_products"), | |
| # "orders": os.getenv("sap_cap_service_url_salesorders"), | |
| # "order_items": os.getenv("sap_cap_service_url_salesorderitems"), | |
| # } | |
| # # ========================= | |
| # # LOAD POWERFUL 3B MODEL | |
| # # ========================= | |
| # # This model is 6x larger than the 0.5B and much smarter at reasoning | |
| # model_path = hf_hub_download( | |
| # repo_id="Qwen/Qwen2.5-Coder-3B-Instruct-GGUF", | |
| # filename="qwen2.5-coder-3b-instruct-q4_k_m.gguf" | |
| # ) | |
| # llm = Llama( | |
| # model_path=model_path, | |
| # n_ctx=2048, | |
| # n_threads=4 # Increased threads for the larger model | |
| # ) | |
| # # ========================= | |
| # # CACHE & DATA FETCHING | |
| # # ========================= | |
| # access_token = None | |
| # data_cache = {} | |
| # last_refresh = 0 | |
| # def get_token(): | |
| # global access_token | |
| # if not token_url: return | |
| # try: | |
| # res = requests.post(token_url, data={"grant_type": "client_credentials"}, auth=(client_id, client_secret), timeout=10) | |
| # access_token = res.json().get("access_token") | |
| # except Exception as e: | |
| # print(f"Auth Error: {e}") | |
| # def fetch_data(): | |
| # global data_cache, last_refresh | |
| # if time.time() - last_refresh < 600 and data_cache: | |
| # return data_cache | |
| # get_token() | |
| # headers = {"Authorization": f"Bearer {access_token}"} | |
| # def fetch(url): | |
| # try: | |
| # r = requests.get(url, headers=headers, timeout=10) | |
| # return r.json().get("value", []) | |
| # except: return [] | |
| # with concurrent.futures.ThreadPoolExecutor() as executor: | |
| # futures = {k: executor.submit(fetch, v) for k, v in urls.items()} | |
| # data_cache = {k: f.result() for k, f in futures.items()} | |
| # last_refresh = time.time() | |
| # return data_cache | |
| # # ========================= | |
| # # SMART LLM QUERY GENERATOR | |
| # # ========================= | |
| # def generate_query(user_prompt): | |
| # prompt = f"""<|im_start|>system | |
| # You are an SAP Data Expert. Convert user requests into a JSON query. | |
| # Rules: | |
| # 1. If the user wants a list/table, use "type": "select". | |
| # 2. If the user wants totals, counts, or "top" spending, use "type": "aggregation". | |
| # 3. Tables: "customers", "products", "orders", "order_items". | |
| # Output format: | |
| # {{ | |
| # "type": "select" | "aggregation", | |
| # "table": "string", | |
| # "limit": number | |
| # }} | |
| # <|im_end|> | |
| # <|im_start|>user | |
| # {user_prompt} | |
| # <|im_end|> | |
| # <|im_start|>assistant | |
| # """ | |
| # output = llm(prompt, max_tokens=128, temperature=0.1, stop=["<|im_end|>"]) | |
| # text = output["choices"][0]["text"].strip() | |
| # # Extract JSON using regex (handles model chatter) | |
| # match = re.search(r'(\{.*\})', text, re.DOTALL) | |
| # if match: | |
| # try: | |
| # return json.loads(match.group(1)) | |
| # except: | |
| # return None | |
| # return None | |
| # # ========================= | |
| # # ENHANCED EXECUTION ENGINE | |
| # # ========================= | |
| # def execute_query(q, data): | |
| # if not q: | |
| # return "System could not parse your request into a valid query." | |
| # query_type = q.get("type") | |
| # table_name = q.get("table") | |
| # limit = q.get("limit", 10) | |
| # try: | |
| # # AGGREGATION LOGIC (Totals/Summing) | |
| # if query_type == "aggregation": | |
| # items = data.get("order_items", []) | |
| # orders = {o['ID']: o for o in data.get("orders", [])} | |
| # customers = {c['ID']: c for c in data.get("customers", [])} | |
| # summary = {} | |
| # for item in items: | |
| # order = orders.get(item.get("parent_ID")) | |
| # if order: | |
| # cust = customers.get(order.get("customer_ID")) | |
| # name = cust.get("name", "Unknown") if cust else "Unknown Customer" | |
| # amount = float(item.get("netAmount", 0)) | |
| # summary[name] = summary.get(name, 0) + amount | |
| # sorted_res = sorted(summary.items(), key=lambda x: x[1], reverse=True) | |
| # return "\n".join([f"{name}: ${amt:,.2f}" for name, amt in sorted_res[:limit]]) | |
| # # SELECT LOGIC (Listing data) | |
| # elif query_type == "select": | |
| # rows = data.get(table_name, []) | |
| # if not rows: return f"No data found in {table_name}." | |
| # # Clean up the output for better readability in the UI | |
| # formatted_list = [] | |
| # for row in rows[:limit]: | |
| # # If it's an order, try to make it readable | |
| # if table_name == "orders": | |
| # formatted_list.append(f"Order ID: {row.get('ID')} | Date: {row.get('createdAt', 'N/A')} | Status: {row.get('lifecycleStatus', 'N/A')}") | |
| # else: | |
| # formatted_list.append(str(row)) | |
| # return "\n".join(formatted_list) | |
| # except Exception as e: | |
| # return f"Execution Error: {str(e)}" | |
| # return "Invalid query parameters generated by LLM." | |
| # # ========================= | |
| # # GRADIO UI | |
| # # ========================= | |
| # def main_process(user_prompt): | |
| # data = fetch_data() | |
| # query_obj = generate_query(user_prompt) | |
| # print(f"DEBUG: Logic identified as: {query_obj}") | |
| # return execute_query(query_obj, data) | |
| # with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo: | |
| # gr.Markdown("# 🚀 Enterprise SAP AI Assistant") | |
| # with gr.Row(): | |
| # with gr.Column(): | |
| # inp = gr.Textbox(placeholder="Try: 'List all orders' or 'Who are my top customers?'", label="Query") | |
| # btn = gr.Button("Execute", variant="primary") | |
| # with gr.Column(): | |
| # out = gr.Textbox(label="Result", lines=15) | |
| # btn.click(main_process, inputs=inp, outputs=out) | |
| # demo.launch(server_name="0.0.0.0") | |
| # ==================================================================================== | |
| # import gradio as gr | |
| # import os | |
| # import time | |
| # import requests | |
| # import json | |
| # import re | |
| # import numpy as np | |
| # import faiss | |
| # from huggingface_hub import hf_hub_download | |
| # from llama_cpp import Llama | |
| # from sentence_transformers import SentenceTransformer | |
| # # ========================= | |
| # # CONFIG & MODEL LOAD | |
| # # ========================= | |
| # # Using the 3B model as the "Brain" and SentenceTransformer as the "Searcher" | |
| # model_path = hf_hub_download( | |
| # repo_id="Qwen/Qwen2.5-Coder-3B-Instruct-GGUF", | |
| # filename="qwen2.5-coder-3b-instruct-q4_k_m.gguf" | |
| # ) | |
| # llm = Llama(model_path=model_path, n_ctx=2048, n_threads=4) | |
| # embed_model = SentenceTransformer('all-MiniLM-L6-v2') | |
| # # SAP URLs (Placeholders) | |
| # urls = { | |
| # "customers": os.getenv("sap_cap_service_url_customers"), | |
| # "orders": os.getenv("sap_cap_service_url_salesorders"), | |
| # } | |
| # # ========================= | |
| # # DATA FETCHING | |
| # # ========================= | |
| # def fetch_all_data(): | |
| # # In a real scenario, use your Auth token logic here | |
| # # For now, this combines all text data for the RAG engine | |
| # all_docs = [] | |
| # # Mock retrieval for demonstration - Replace with your fetch_data() logic | |
| # raw_data = { | |
| # "customers": [{"name": "Acme Corp", "city": "Berlin"}, {"name": "Stark Ind", "city": "NY"}], | |
| # "orders": [{"ID": "101", "date": "2023-10-01", "total": 500}] | |
| # } | |
| # for table, rows in raw_data.items(): | |
| # for row in rows: | |
| # all_docs.append(f"Table: {table} | Data: {json.dumps(row)}") | |
| # return all_docs | |
| # # ========================= | |
| # # RAG ENGINE (The "Filter") | |
| # # ========================= | |
| # def get_relevant_context(query, documents): | |
| # if not documents: return "" | |
| # # 1. Create Embeddings | |
| # doc_embeddings = embed_model.encode(documents) | |
| # query_embedding = embed_model.encode([query]) | |
| # # 2. Setup Vector DB (FAISS) | |
| # dimension = doc_embeddings.shape[1] | |
| # index = faiss.IndexFlatL2(dimension) | |
| # index.add(np.array(doc_embeddings).astype('float32')) | |
| # # 3. Search for top 3 most relevant rows | |
| # D, I = index.search(np.array(query_embedding).astype('float32'), k=3) | |
| # retrieved_context = "\n".join([documents[i] for i in I[0]]) | |
| # return retrieved_context | |
| # # ========================= | |
| # # MAIN PROCESS | |
| # # ========================= | |
| # def rag_process(user_prompt): | |
| # # Step 1: Get all data | |
| # documents = fetch_all_data() | |
| # # Step 2: RAG Filtering (Retrieve only what matters) | |
| # context = get_relevant_context(user_prompt, documents) | |
| # # Step 3: LLM Generation (Only looks at filtered data) | |
| # prompt = f"""<|im_start|>system | |
| # You are an SAP assistant. Use the provided context to answer the user request. | |
| # Context: | |
| # {context} | |
| # <|im_end|> | |
| # <|im_start|>user | |
| # {user_prompt} | |
| # <|im_end|> | |
| # <|im_start|>assistant | |
| # """ | |
| # output = llm(prompt, max_tokens=256, temperature=0.1, stop=["<|im_end|>"]) | |
| # return output["choices"][0]["text"].strip() | |
| # # ========================= | |
| # # UI | |
| # # ========================= | |
| # demo = gr.Interface( | |
| # fn=rag_process, | |
| # inputs=gr.Textbox( | |
| # lines=2, | |
| # placeholder="Ask about SAP data (e.g., Find customers in Berlin)" | |
| # ), | |
| # outputs="text", | |
| # title="SAP Assistant", | |
| # description="Ask questions about SAP data using RAG" | |
| # ) | |
| # demo.launch() | |
| # ============================================================================================================= | |
| import gradio as gr | |
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| import requests | |
| import os | |
| import pandas as pd | |
| import time | |
| # ========================= | |
| # ENV VARIABLES | |
| # ========================= | |
| client_id = "sb-cap1-3c4588e0trial-dev!t617058" | |
| client_secret = "acbe78be-ead5-4b12-b3b4-32fdb27d0f5f$hFj-hDXxwHkNHC-CAvv-OKSr3KH96nLL4KqwIg7M8D8=" | |
| token_url = "https://3c4588e0trial.authentication.us10.hana.ondemand.com/oauth/token" | |
| cap_service_url_customers = "https://3c4588e0trial-dev-cap1-srv.cfapps.us10-001.hana.ondemand.com/odata/v4/sales/Customers?$top=2" | |
| cap_service_url_products = "https://3c4588e0trial-dev-cap1-srv.cfapps.us10-001.hana.ondemand.com/odata/v4/sales/Products?$top=2" | |
| cap_service_url_saleorders = "https://3c4588e0trial-dev-cap1-srv.cfapps.us10-001.hana.ondemand.com/odata/v4/sales/SalesOrders?$top=2" | |
| cap_service_url_saleorderitems = "https://3c4588e0trial-dev-cap1-srv.cfapps.us10-001.hana.ondemand.com/odata/v4/sales/SalesOrderItems?$top=2" | |
| # ========================= | |
| # GLOBAL VARIABLES | |
| # ========================= | |
| access_token = None | |
| cached_customers = None | |
| cached_products = None | |
| cached_salesorders = None | |
| cached_salesorderitems = None | |
| last_refresh = 0 | |
| # ========================= | |
| # LOAD GGUF MODEL (once) | |
| # ========================= | |
| print("Downloading/Locating GGUF model...") | |
| model_path = hf_hub_download( | |
| repo_id="Qwen/Qwen2.5-1.5B-Instruct-GGUF", | |
| filename="qwen2.5-1.5b-instruct-q4_k_m.gguf" | |
| ) | |
| print("Loading model into Llama CPP...") | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=4096, # Increased context window to ensure SAP data fits | |
| n_threads=None, # Automatically uses maximum available CPU threads | |
| verbose=False # Set to True if you want to see inference speed logs | |
| ) | |
| # ========================= | |
| # TOKEN FUNCTION | |
| # ========================= | |
| def generate_sap_xsuaa_token(): | |
| global access_token | |
| print("Generating SAP token...") | |
| auth_response = requests.post( | |
| token_url, | |
| data={"grant_type": "client_credentials"}, | |
| auth=(client_id, client_secret) | |
| ) | |
| if auth_response.status_code != 200: | |
| print("Token Error:", auth_response.text) | |
| return None | |
| access_token = auth_response.json().get("access_token") | |
| print("Token generated!") | |
| return access_token | |
| # ========================= | |
| # FETCH SAP DATA | |
| # ========================= | |
| def fetch_sap_data(): | |
| global access_token | |
| if not access_token: | |
| generate_sap_xsuaa_token() | |
| headers = { | |
| "Authorization": f"Bearer {access_token}", | |
| "Accept": "application/json" | |
| } | |
| res1 = requests.get(cap_service_url_customers, headers=headers) | |
| res2 = requests.get(cap_service_url_products, headers=headers) | |
| res3 = requests.get(cap_service_url_saleorders, headers=headers) | |
| res4 = requests.get(cap_service_url_saleorderitems, headers=headers) | |
| # Retry if token expired | |
| if res1.status_code in [400, 401, 403]: | |
| print("Token expired. Regenerating...") | |
| access_token = None | |
| generate_sap_xsuaa_token() | |
| headers["Authorization"] = f"Bearer {access_token}" | |
| res1 = requests.get(cap_service_url_customers, headers=headers) | |
| res2 = requests.get(cap_service_url_products, headers=headers) | |
| res3 = requests.get(cap_service_url_saleorders, headers=headers) | |
| res4 = requests.get(cap_service_url_saleorderitems, headers=headers) | |
| df_customers = pd.DataFrame(res1.json().get("value", [])) | |
| df_products = pd.DataFrame(res2.json().get("value", [])) | |
| df_saleorders = pd.DataFrame(res3.json().get("value", [])) | |
| df_saleorderitems = pd.DataFrame(res4.json().get("value", [])) | |
| # Keep only important columns if they exist | |
| if not df_customers.empty: df_customers = df_customers[["ID", "name", "country", "industry"]] | |
| if not df_products.empty: df_products = df_products[["ID", "name", "category", "price", "currency"]] | |
| if not df_saleorders.empty: df_saleorders = df_saleorders[["ID", "customer_ID", "orderDate", "status"]] | |
| if not df_saleorderitems.empty: df_saleorderitems = df_saleorderitems[["ID", "parent_ID", "product_ID", "quantity", "netAmount"]] | |
| return df_customers, df_products, df_saleorders, df_saleorderitems | |
| # ========================= | |
| # CACHE LOGIC | |
| # ========================= | |
| def get_cached_data(): | |
| global cached_customers, cached_products, cached_salesorders, cached_salesorderitems, last_refresh | |
| # Refresh every 5 minutes (300 seconds, not 3000) | |
| if time.time() - last_refresh > 300 or cached_customers is None: | |
| print("Refreshing SAP data...") | |
| cached_customers, cached_products, cached_salesorders, cached_salesorderitems = fetch_sap_data() | |
| last_refresh = time.time() | |
| return cached_customers, cached_products, cached_salesorders, cached_salesorderitems | |
| # ========================= | |
| # MAIN FUNCTION (LLM) | |
| # ========================= | |
| def generate_response(user_prompt): | |
| try: | |
| # Get cached SAP data | |
| df_customers, df_products, df_saleorders, df_saleorderitems = get_cached_data() | |
| # Reduce size | |
| customers_text = df_customers.to_string(index=False) if not df_customers.empty else "No Data" | |
| products_text = df_products.to_string(index=False) if not df_products.empty else "No Data" | |
| saleorders_text = df_saleorders.to_string(index=False) if not df_saleorders.empty else "No Data" | |
| saleorderitems_text = df_saleorderitems.to_string(index=False) if not df_saleorderitems.empty else "No Data" | |
| # Build system prompt | |
| system_prompt = f""" | |
| You are an intelligent Corporate SAP Assistant bot. | |
| Your sole purpose is to answer the user's questions based strictly on the database records provided to you. | |
| Customers Data: {customers_text} | |
| Products Data: {products_text} | |
| Sale orders Data: {saleorders_text} | |
| Sale order items Data: {saleorderitems_text} | |
| CRITICAL RULES: | |
| 1. NO HALLUCINATIONS: You must base your answer ONLY on the data provided. | |
| 2. MISSING DATA: If the provided data does not contain the answer, do not guess. Say: "I could not find that information in the current SAP database." | |
| 3. FORMATTING: You must output your response in Markdown. Use bold text for important nouns and bullet points for lists to make it easy to read. | |
| 4. TONE: Be concise, highly professional, and helpful. | |
| """ | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt} | |
| ] | |
| # Generate response using llama-cpp-python | |
| response = llm.create_chat_completion( | |
| messages=messages, | |
| max_tokens=150, | |
| temperature=0.2, | |
| ) | |
| # Extract the content from the response dictionary | |
| generated_text = response["choices"][0]["message"]["content"].strip() | |
| return generated_text | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # ========================= | |
| # GRADIO UI + API | |
| # ========================= | |
| with gr.Blocks() as demo: | |
| user_input = gr.Textbox(label="User Question") | |
| output = gr.Textbox(label="Response") | |
| btn = gr.Button("Generate") | |
| btn.click( | |
| fn=generate_response, | |
| inputs=[user_input], | |
| outputs=output, | |
| api_name="predict" | |
| ) | |
| # REQUIRED for API exposure | |
| demo.queue() | |
| demo.launch() |