File size: 6,263 Bytes
2eb9abd
 
 
 
 
 
 
 
 
 
 
 
 
35ea8c9
 
1a7c909
 
 
 
 
 
2eb9abd
 
 
 
 
 
 
35ea8c9
2eb9abd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68b5f94
2eb9abd
68b5f94
 
 
 
2eb9abd
68b5f94
 
 
 
 
2eb9abd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
import gradio as gr
import requests
import pandas as pd
import time
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

# =========================
# LOAD GGUF MODEL
# =========================
print("Downloading GGUF model...")

model_path = hf_hub_download(
    # repo_id="bartowski/Qwen2.5-1.5B-Instruct-GGUF",
    # filename="Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
    # repo_id="bartowski/Qwen2.5-0.5B-Instruct-GGUF",
    # filename="Qwen2.5-0.5B-Instruct-Q4_K_M.gguf"
    repo_id="Qwen/Qwen2.5-1.5B-Instruct-GGUF",
    filename="qwen2.5-1.5b-instruct-q5_k_m.gguf"
    

)

print("Loading model...")

llm = Llama(
    model_path=model_path,
    n_ctx=1024,
    n_threads=2,
    n_batch=512,
    verbose=False
)

# =========================
# ENV VARIABLES (use HF Secrets ideally)
# =========================
client_id = "sb-cap1-3c4588e0trial-dev!t617058"
client_secret = "acbe78be-ead5-4b12-b3b4-32fdb27d0f5f$hFj-hDXxwHkNHC-CAvv-OKSr3KH96nLL4KqwIg7M8D8="
token_url = "https://3c4588e0trial.authentication.us10.hana.ondemand.com/oauth/token"
cap_service_url_customers = "https://3c4588e0trial-dev-cap1-srv.cfapps.us10-001.hana.ondemand.com/odata/v4/sales/Customers?$top=2"
cap_service_url_products = "https://3c4588e0trial-dev-cap1-srv.cfapps.us10-001.hana.ondemand.com/odata/v4/sales/Products?$top=2"
cap_service_url_saleorders = "https://3c4588e0trial-dev-cap1-srv.cfapps.us10-001.hana.ondemand.com/odata/v4/sales/SalesOrders?$top=2"
cap_service_url_saleorderitems = "https://3c4588e0trial-dev-cap1-srv.cfapps.us10-001.hana.ondemand.com/odata/v4/sales/SalesOrderItems?$top=2"
# =========================
# GLOBAL CACHE
# =========================
access_token = None
cached_data = None
last_refresh = 0

# =========================
# TOKEN FUNCTION
# =========================
def generate_token():
    global access_token

    response = requests.post(
        token_url,
        data={"grant_type": "client_credentials"},
        auth=(client_id, client_secret)
    )

    if response.status_code != 200:
        return None

    access_token = response.json().get("access_token")
    return access_token

# =========================
# FETCH SAP DATA
# =========================
def fetch_sap_data():
    global access_token

    if not access_token:
        generate_token()

    headers = {
        "Authorization": f"Bearer {access_token}",
        "Accept": "application/json"
    }

    res1 = requests.get(cap_service_url_customers, headers=headers)
    res2 = requests.get(cap_service_url_products, headers=headers)
    res3 = requests.get(cap_service_url_saleorders, headers=headers)
    res4 = requests.get(cap_service_url_saleorderitems, headers=headers)

    # Retry if token expired
    if res1.status_code in [401, 403]:
        access_token = None
        generate_token()
        headers["Authorization"] = f"Bearer {access_token}"

        res1 = requests.get(cap_service_url_customers, headers=headers)
        res2 = requests.get(cap_service_url_products, headers=headers)
        res3 = requests.get(cap_service_url_saleorders, headers=headers)
        res4 = requests.get(cap_service_url_saleorderitems, headers=headers)

    df_customers = pd.DataFrame(res1.json()["value"])
    df_products = pd.DataFrame(res2.json()["value"])
    df_saleorders = pd.DataFrame(res3.json()["value"])
    df_saleorderitems = pd.DataFrame(res4.json()["value"])

    # Reduce columns (IMPORTANT for speed)
    df_customers = df_customers[["ID","name","country","industry"]]
    df_products = df_products[["ID","name","category","price","currency"]]
    df_saleorders = df_saleorders[["ID","customer_ID","orderDate","status"]]
    df_saleorderitems = df_saleorderitems[["ID","parent_ID","product_ID","quantity","netAmount"]]

    return df_customers, df_products, df_saleorders, df_saleorderitems

# =========================
# CACHE FUNCTION
# =========================
def get_cached_data():
    global cached_data, last_refresh

    # Refresh every 5 minutes
    if time.time() - last_refresh > 3000 or cached_data is None:
        cached_data = fetch_sap_data()
        last_refresh = time.time()

    return cached_data

# =========================
# MAIN LLM FUNCTION
# =========================
def generate_response(user_prompt):
    try:
        df_customers, df_products, df_saleorders, df_saleorderitems = get_cached_data()

        # Convert to compact text (IMPORTANT)
        customers_text = df_customers.to_string(index=False)
        products_text = df_products.to_string(index=False)
        saleorders_text = df_saleorders.to_string(index=False)
        saleorderitems_text = df_saleorderitems.to_string(index=False)

        prompt = f"""
        Your purpose is to answer the user's questions based strictly on the database records provided to you.
        
        Customers Data: {customers_text}
        Products Data: {products_text}
        Sale orders Data: {saleorders_text}
        Sale order items Data: {saleorderitems_text}
        
        RULES:
        1. NO HALLUCINATIONS: You must base your answer ONLY on the data provided. 
        2. MISSING DATA: If the provided data does not contain the answer, do not guess. Say: "I could not find that information in the current SAP database."
        3. FORMATTING: You must output your response in Markdown. Use bold text for important nouns and bullet points for lists to make it easy to read.
        4. TONE: Be concise, highly professional, and helpful.
        
        User: {user_prompt}
        Assistant:
        """

        output = llm(
            prompt,
            max_tokens=100,
            temperature=0.3,
            top_p=0.7,
            stop=["User:", "Assistant:"]
        )

        response = output["choices"][0]["text"].strip()
        return response

    except Exception as e:
        return f"Error: {str(e)}"

# =========================
# GRADIO UI
# =========================
with gr.Blocks() as demo:

    user_input = gr.Textbox(label="User Question")

    output = gr.Textbox(label="Response")

    btn = gr.Button("Generate")

    btn.click(
        fn=generate_response,
        inputs=[user_input],
        outputs=output,
        api_name="predict"
    )

# REQUIRED for API exposure
demo.queue()

demo.launch()