PD03 commited on
Commit
393aa68
Β·
verified Β·
1 Parent(s): fa59ff8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -108
app.py CHANGED
@@ -1,12 +1,11 @@
1
- # Setup Hugging Face Transformers for LLAMA3
2
  import os
3
  import requests
4
  import json
5
  import gradio as gr
6
  from typing import List, Dict, Any, Optional
7
  import logging
8
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
9
- import torch
10
 
11
  # Setup logging
12
  logging.basicConfig(level=logging.INFO)
@@ -14,100 +13,111 @@ logger = logging.getLogger(__name__)
14
 
15
  # Configuration - Set these as environment variables in Hugging Face Spaces
16
  SAP_API_KEY = os.getenv('SAP_API_KEY') # Set in Space secrets
17
- HF_TOKEN = os.getenv('HF_API_TOKEN') # Set in Space secrets for private models
18
  SAP_BASE_URL = "https://sandbox.api.sap.com/s4hanacloud/sap/opu/odata/sap"
19
 
20
- # Initialize LLAMA3 model
21
- MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct" # or "meta-llama/Meta-Llama-3-70B-Instruct" for larger model
 
22
 
23
  class LLAMA3Client:
24
- def __init__(self):
 
 
 
 
 
 
 
 
 
 
 
 
25
  try:
26
- # Initialize tokenizer and model
27
- logger.info("Loading LLAMA3 model...")
28
- self.tokenizer = AutoTokenizer.from_pretrained(
29
- MODEL_NAME,
30
- token=HF_TOKEN,
31
- trust_remote_code=True
32
- )
33
-
34
- # Use GPU if available
35
- device = "cuda" if torch.cuda.is_available() else "cpu"
36
- logger.info(f"Using device: {device}")
37
-
38
- self.model = AutoModelForCausalLM.from_pretrained(
39
- MODEL_NAME,
40
- token=HF_TOKEN,
41
- torch_dtype=torch.float16 if device == "cuda" else torch.float32,
42
- device_map="auto" if device == "cuda" else None,
43
- trust_remote_code=True,
44
- low_cpu_mem_usage=True
45
- )
46
-
47
- # Create text generation pipeline
48
- self.generator = pipeline(
49
- "text-generation",
50
- model=self.model,
51
- tokenizer=self.tokenizer,
52
- torch_dtype=torch.float16 if device == "cuda" else torch.float32,
53
- device_map="auto" if device == "cuda" else None
54
- )
55
-
56
- logger.info("LLAMA3 model loaded successfully")
57
-
58
  except Exception as e:
59
- logger.error(f"Error loading LLAMA3 model: {e}")
60
- # Fallback to smaller model or API-based approach
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  try:
62
- self.generator = pipeline(
63
- "text-generation",
64
- model="microsoft/DialoGPT-medium",
65
- tokenizer="microsoft/DialoGPT-medium"
 
66
  )
67
- logger.info("Fallback model loaded")
68
- except:
69
- self.generator = None
70
- logger.error("Failed to load any model")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
- def generate_response(self, prompt: str, max_length: int = 1000, temperature: float = 0.1) -> str:
73
- """Generate response using LLAMA3"""
74
- if not self.generator:
75
- return "Model not available. Please check configuration."
76
 
77
- try:
78
- # Format prompt for LLAMA3 instruction format
79
- formatted_prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
80
 
81
- You are a helpful SAP data analyst. Provide clear, concise answers based on the provided data.<|eot_id|><|start_header_id|>user<|end_header_id|>
82
 
83
  {prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
84
 
85
  """
86
-
87
- # Generate response
88
- outputs = self.generator(
89
- formatted_prompt,
90
- max_length=max_length,
91
- temperature=temperature,
92
- do_sample=True,
93
- top_p=0.9,
94
- num_return_sequences=1,
95
- pad_token_id=self.tokenizer.eos_token_id,
96
- eos_token_id=self.tokenizer.eos_token_id
97
  )
98
 
99
- # Extract generated text
100
- generated_text = outputs[0]['generated_text']
101
-
102
- # Extract only the assistant's response
103
- if "<|start_header_id|>assistant<|end_header_id|>" in generated_text:
104
- response = generated_text.split("<|start_header_id|>assistant<|end_header_id|>")[-1]
105
- response = response.replace("<|eot_id|>", "").strip()
106
  else:
107
- response = generated_text[len(formatted_prompt):].strip()
108
-
109
- return response if response else "I couldn't generate a proper response. Please try rephrasing your question."
110
-
111
  except Exception as e:
112
  logger.error(f"Error generating response: {e}")
113
  return f"I encountered an error while processing your question: {str(e)}"
@@ -137,7 +147,7 @@ class SAPDataFetcher:
137
  logger.error(f"JSON decode error: {e}")
138
  return None
139
 
140
- def fetch_sales_orders(self, top: int = 50) -> List[Dict]:
141
  """Fetch sales orders with error handling"""
142
  url = f"{SAP_BASE_URL}/API_SALES_ORDER_SRV/A_SalesOrder?$top={top}&$inlinecount=allpages"
143
  data = self._make_request(url)
@@ -163,7 +173,7 @@ class SAPDataFetcher:
163
  logger.error("Failed to fetch sales orders or invalid response format")
164
  return []
165
 
166
- def fetch_purchase_orders(self, top: int = 50) -> List[Dict]:
167
  """Fetch purchase order headers"""
168
  url = f"{SAP_BASE_URL}/API_PURCHASEORDER_PROCESS_SRV/A_PurchaseOrder?$top={top}&$inlinecount=allpages"
169
  data = self._make_request(url)
@@ -195,7 +205,7 @@ class SAPDataFetcher:
195
  """Fetch purchase order items for given order numbers"""
196
  all_items = []
197
 
198
- for po_number in purchase_orders[:10]: # Limit to first 10 to avoid timeout
199
  url = f"{SAP_BASE_URL}/API_PURCHASEORDER_PROCESS_SRV/A_PurchaseOrderItem?$filter=PurchaseOrder eq '{po_number}'"
200
  data = self._make_request(url)
201
 
@@ -281,7 +291,7 @@ Answer only "yes" or "no" """
281
  # Check if item details are needed
282
  if self.needs_item_details(question) and po_headers:
283
  logger.info("Fetching item-level details")
284
- po_numbers = [po["PurchaseOrder"] for po in po_headers if po["PurchaseOrder"]]
285
  po_items = self.data_fetcher.fetch_purchase_order_items(po_numbers)
286
  context["items"] = po_items
287
  data_type = "Purchase Orders with Item Details"
@@ -302,10 +312,10 @@ Answer only "yes" or "no" """
302
 
303
  def generate_response(self, question: str, context: Dict, data_type: str) -> str:
304
  """Generate response using LLAMA3"""
305
- # Limit context size to prevent token overflow
306
  context_str = json.dumps(context, indent=2)
307
- if len(context_str) > 4000: # Smaller limit for LLAMA3
308
- context_str = context_str[:4000] + "... (truncated)"
309
 
310
  prompt = f"""Data Type: {data_type}
311
 
@@ -319,24 +329,28 @@ Instructions:
319
  2. Include specific numbers, dates, or values when relevant
320
  3. If the data doesn't contain enough information to answer fully, mention this
321
  4. Format your response in a user-friendly way
322
- 5. If there are multiple records, summarize key insights"""
323
 
324
  try:
325
- return self.llama_client.generate_response(prompt, max_length=800, temperature=0.1)
326
  except Exception as e:
327
  logger.error(f"Error generating response: {e}")
328
  return f"I encountered an error while processing your question: {str(e)}"
329
 
330
  # Initialize the system
331
  try:
332
- llama_client = LLAMA3Client()
333
- if SAP_API_KEY:
334
- data_fetcher = SAPDataFetcher(SAP_API_KEY)
335
- sap_agent = SAPAgent(data_fetcher, llama_client)
336
- logger.info("SAP Agent initialized successfully")
337
- else:
338
- logger.warning("SAP_API_KEY not found. Demo mode enabled.")
339
  sap_agent = None
 
 
 
 
 
 
 
 
 
340
  except Exception as e:
341
  logger.error(f"Failed to initialize SAP Agent: {e}")
342
  sap_agent = None
@@ -345,21 +359,30 @@ except Exception as e:
345
  def chat_with_sap(message, history):
346
  """Handle chat interactions"""
347
  if not sap_agent:
348
- return history + [("System", "SAP Agent not initialized. Please check your API key configuration in Space secrets.")]
349
 
350
  if not message.strip():
351
  return history
352
 
353
  try:
354
- response = sap_agent.process_query(message)
355
  history = history or []
356
- history.append((message, response))
357
- return history
 
 
 
 
 
 
358
  except Exception as e:
359
  error_msg = f"Error processing your request: {str(e)}"
360
  history = history or []
361
- history.append((message, error_msg))
362
- return history
 
 
 
363
 
364
  def clear_chat():
365
  return []
@@ -367,20 +390,24 @@ def clear_chat():
367
  # Create Gradio interface
368
  with gr.Blocks(title="SAP Order Analytics Agent with LLAMA3") as demo:
369
  gr.Markdown("""
370
- # πŸš€ SAP Order Analytics Agent (Powered by LLAMA3)
371
 
372
- This AI agent uses Meta's LLAMA3 model to help you analyze SAP Sales and Purchase Orders. Ask questions like:
373
  - "How many sales orders do we have?"
374
  - "What's the total value of all purchase orders?"
375
- - "Show me recent purchase orders from supplier X"
376
- - "What are the top materials by quantity?"
377
 
378
- **Note:** Make sure to set your `SAP_API_KEY` and `HF_TOKEN` in the Space secrets.
 
 
 
379
  """)
380
 
381
  chatbot = gr.Chatbot(
382
  height=500,
383
- placeholder="Ask me anything about your SAP orders..."
 
384
  )
385
 
386
  with gr.Row():
@@ -403,4 +430,4 @@ with gr.Blocks(title="SAP Order Analytics Agent with LLAMA3") as demo:
403
 
404
  # Launch the interface
405
  if __name__ == "__main__":
406
- demo.launch()
 
1
+ # Setup Hugging Face Inference API for LLAMA3
2
  import os
3
  import requests
4
  import json
5
  import gradio as gr
6
  from typing import List, Dict, Any, Optional
7
  import logging
8
+ import time
 
9
 
10
  # Setup logging
11
  logging.basicConfig(level=logging.INFO)
 
13
 
14
  # Configuration - Set these as environment variables in Hugging Face Spaces
15
  SAP_API_KEY = os.getenv('SAP_API_KEY') # Set in Space secrets
16
+ HF_TOKEN = os.getenv('HF_TOKEN') # Set in Space secrets
17
  SAP_BASE_URL = "https://sandbox.api.sap.com/s4hanacloud/sap/opu/odata/sap"
18
 
19
+ # Hugging Face Inference API endpoints
20
+ HF_API_BASE = "https://api-inference.huggingface.co/models"
21
+ LLAMA3_MODEL = "meta-llama/Meta-Llama-3-8B-Instruct" # Using inference API
22
 
23
  class LLAMA3Client:
24
+ def __init__(self, hf_token: str):
25
+ self.hf_token = hf_token
26
+ self.api_url = f"{HF_API_BASE}/{LLAMA3_MODEL}"
27
+ self.headers = {
28
+ "Authorization": f"Bearer {hf_token}",
29
+ "Content-Type": "application/json"
30
+ }
31
+
32
+ # Warm up the model
33
+ self._warm_up_model()
34
+
35
+ def _warm_up_model(self):
36
+ """Warm up the model to avoid cold start delays"""
37
  try:
38
+ logger.info("Warming up LLAMA3 model...")
39
+ self._make_inference_request("Hello", max_new_tokens=10)
40
+ logger.info("Model warmed up successfully")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  except Exception as e:
42
+ logger.warning(f"Model warm-up failed: {e}")
43
+
44
+ def _make_inference_request(self, prompt: str, max_new_tokens: int = 500, temperature: float = 0.1, max_retries: int = 3) -> str:
45
+ """Make inference request to Hugging Face API with retry logic"""
46
+
47
+ payload = {
48
+ "inputs": prompt,
49
+ "parameters": {
50
+ "max_new_tokens": max_new_tokens,
51
+ "temperature": temperature,
52
+ "do_sample": True,
53
+ "top_p": 0.9,
54
+ "return_full_text": False
55
+ }
56
+ }
57
+
58
+ for attempt in range(max_retries):
59
  try:
60
+ response = requests.post(
61
+ self.api_url,
62
+ headers=self.headers,
63
+ json=payload,
64
+ timeout=60
65
  )
66
+
67
+ if response.status_code == 503:
68
+ # Model is loading, wait and retry
69
+ wait_time = min(20 * (attempt + 1), 60)
70
+ logger.info(f"Model loading, waiting {wait_time}s...")
71
+ time.sleep(wait_time)
72
+ continue
73
+
74
+ response.raise_for_status()
75
+ result = response.json()
76
+
77
+ if isinstance(result, list) and len(result) > 0:
78
+ return result[0].get('generated_text', '').strip()
79
+ elif isinstance(result, dict) and 'generated_text' in result:
80
+ return result['generated_text'].strip()
81
+ else:
82
+ logger.error(f"Unexpected response format: {result}")
83
+ return "I received an unexpected response format."
84
+
85
+ except requests.exceptions.RequestException as e:
86
+ logger.error(f"Request failed (attempt {attempt + 1}): {e}")
87
+ if attempt == max_retries - 1:
88
+ return f"Failed to get response after {max_retries} attempts: {str(e)}"
89
+ time.sleep(2 ** attempt) # Exponential backoff
90
+ except Exception as e:
91
+ logger.error(f"Unexpected error: {e}")
92
+ return f"An unexpected error occurred: {str(e)}"
93
+
94
+ return "Failed to generate response"
95
 
96
+ def generate_response(self, prompt: str, max_length: int = 500, temperature: float = 0.1) -> str:
97
+ """Generate response using LLAMA3 via Inference API"""
 
 
98
 
99
+ # Format prompt for LLAMA3 instruction format
100
+ formatted_prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
 
101
 
102
+ You are a helpful SAP data analyst. Provide clear, concise answers based on the provided data. Keep responses under 300 words.<|eot_id|><|start_header_id|>user<|end_header_id|>
103
 
104
  {prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
105
 
106
  """
107
+
108
+ try:
109
+ response = self._make_inference_request(
110
+ formatted_prompt,
111
+ max_new_tokens=min(max_length, 400), # Limit tokens to avoid timeouts
112
+ temperature=temperature
 
 
 
 
 
113
  )
114
 
115
+ # Clean up the response
116
+ if response and len(response.strip()) > 0:
117
+ return response
 
 
 
 
118
  else:
119
+ return "I couldn't generate a proper response. Please try rephrasing your question."
120
+
 
 
121
  except Exception as e:
122
  logger.error(f"Error generating response: {e}")
123
  return f"I encountered an error while processing your question: {str(e)}"
 
147
  logger.error(f"JSON decode error: {e}")
148
  return None
149
 
150
+ def fetch_sales_orders(self, top: int = 30) -> List[Dict]:
151
  """Fetch sales orders with error handling"""
152
  url = f"{SAP_BASE_URL}/API_SALES_ORDER_SRV/A_SalesOrder?$top={top}&$inlinecount=allpages"
153
  data = self._make_request(url)
 
173
  logger.error("Failed to fetch sales orders or invalid response format")
174
  return []
175
 
176
+ def fetch_purchase_orders(self, top: int = 30) -> List[Dict]:
177
  """Fetch purchase order headers"""
178
  url = f"{SAP_BASE_URL}/API_PURCHASEORDER_PROCESS_SRV/A_PurchaseOrder?$top={top}&$inlinecount=allpages"
179
  data = self._make_request(url)
 
205
  """Fetch purchase order items for given order numbers"""
206
  all_items = []
207
 
208
+ for po_number in purchase_orders[:5]: # Reduced limit for faster processing
209
  url = f"{SAP_BASE_URL}/API_PURCHASEORDER_PROCESS_SRV/A_PurchaseOrderItem?$filter=PurchaseOrder eq '{po_number}'"
210
  data = self._make_request(url)
211
 
 
291
  # Check if item details are needed
292
  if self.needs_item_details(question) and po_headers:
293
  logger.info("Fetching item-level details")
294
+ po_numbers = [po["PurchaseOrder"] for po in po_headers[:5] if po["PurchaseOrder"]] # Limit for performance
295
  po_items = self.data_fetcher.fetch_purchase_order_items(po_numbers)
296
  context["items"] = po_items
297
  data_type = "Purchase Orders with Item Details"
 
312
 
313
  def generate_response(self, question: str, context: Dict, data_type: str) -> str:
314
  """Generate response using LLAMA3"""
315
+ # Limit context size for API efficiency
316
  context_str = json.dumps(context, indent=2)
317
+ if len(context_str) > 2000: # Smaller limit for API
318
+ context_str = context_str[:2000] + "... (truncated)"
319
 
320
  prompt = f"""Data Type: {data_type}
321
 
 
329
  2. Include specific numbers, dates, or values when relevant
330
  3. If the data doesn't contain enough information to answer fully, mention this
331
  4. Format your response in a user-friendly way
332
+ 5. Keep response under 250 words"""
333
 
334
  try:
335
+ return self.llama_client.generate_response(prompt, max_length=400, temperature=0.1)
336
  except Exception as e:
337
  logger.error(f"Error generating response: {e}")
338
  return f"I encountered an error while processing your question: {str(e)}"
339
 
340
  # Initialize the system
341
  try:
342
+ if not HF_TOKEN:
343
+ logger.error("HF_TOKEN not found in environment variables")
 
 
 
 
 
344
  sap_agent = None
345
+ else:
346
+ llama_client = LLAMA3Client(HF_TOKEN)
347
+ if SAP_API_KEY:
348
+ data_fetcher = SAPDataFetcher(SAP_API_KEY)
349
+ sap_agent = SAPAgent(data_fetcher, llama_client)
350
+ logger.info("SAP Agent initialized successfully")
351
+ else:
352
+ logger.warning("SAP_API_KEY not found. Demo mode enabled.")
353
+ sap_agent = None
354
  except Exception as e:
355
  logger.error(f"Failed to initialize SAP Agent: {e}")
356
  sap_agent = None
 
359
  def chat_with_sap(message, history):
360
  """Handle chat interactions"""
361
  if not sap_agent:
362
+ return history + [("System", "SAP Agent not initialized. Please check your HF_TOKEN and SAP_API_KEY in Space secrets.")]
363
 
364
  if not message.strip():
365
  return history
366
 
367
  try:
368
+ # Add typing indicator
369
  history = history or []
370
+ history.append((message, "πŸ€” Thinking..."))
371
+ yield history
372
+
373
+ # Process the query
374
+ response = sap_agent.process_query(message)
375
+ history[-1] = (message, response)
376
+ yield history
377
+
378
  except Exception as e:
379
  error_msg = f"Error processing your request: {str(e)}"
380
  history = history or []
381
+ if history and history[-1][1] == "πŸ€” Thinking...":
382
+ history[-1] = (message, error_msg)
383
+ else:
384
+ history.append((message, error_msg))
385
+ yield history
386
 
387
  def clear_chat():
388
  return []
 
390
  # Create Gradio interface
391
  with gr.Blocks(title="SAP Order Analytics Agent with LLAMA3") as demo:
392
  gr.Markdown("""
393
+ # πŸš€ SAP Order Analytics Agent (Powered by LLAMA3 via Inference API)
394
 
395
+ This AI agent uses Meta's LLAMA3 model via Hugging Face Inference API to analyze SAP data. Ask questions like:
396
  - "How many sales orders do we have?"
397
  - "What's the total value of all purchase orders?"
398
+ - "Show me recent purchase orders"
399
+ - "What are the top suppliers?"
400
 
401
+ **Setup Required:**
402
+ 1. Set `HF_TOKEN` in Space secrets (your Hugging Face token)
403
+ 2. Set `SAP_API_KEY` in Space secrets (your SAP API key)
404
+ 3. Ensure you have access to LLAMA3 model on Hugging Face
405
  """)
406
 
407
  chatbot = gr.Chatbot(
408
  height=500,
409
+ placeholder="Ask me anything about your SAP orders...",
410
+ show_copy_button=True
411
  )
412
 
413
  with gr.Row():
 
430
 
431
  # Launch the interface
432
  if __name__ == "__main__":
433
+ demo.launch(share=True)