Spaces:

Chris4K
/

Test

Runtime error

App Files Files Community

Chris4K commited on Jan 17, 2025

Commit

33962f9

verified ·

1 Parent(s): 16ea01c

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -0

app.py CHANGED Viewed

@@ -4,6 +4,8 @@ import gradio as gr
 from langfuse import Langfuse
 langfuse = Langfuse(
   secret_key="sk-lf-229e10c5-6210-4a4b-a432-0f17bc66e56c",
@@ -11,6 +13,8 @@ langfuse = Langfuse(
   host="https://chris4k-langfuse-template-space.hf.space"
 )
 # Load Llama 3.2 model
 model_name = "meta-llama/Llama-3.2-3B-Instruct"  # Replace with the exact model path
 tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -117,6 +121,7 @@ def construct_prompt(user_input, context, chat_history, max_history_turns=1): #
     print(prompt)
     return prompt
 def chat_with_model(user_input, chat_history=[]):
     # Search for relevant products
     search_results = search_products(user_input)
@@ -136,9 +141,26 @@ def chat_with_model(user_input, chat_history=[]):
     else:
         context = "Das weiß ich nicht."
     print("context: ------------------------------------- \n"+context)
     # Pass both user_input and context to construct_prompt
     prompt = construct_prompt(user_input, context, chat_history) # This line is changed
     print("prompt: ------------------------------------- \n"+prompt)
     input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=4096).to("cpu")
     tokenizer.pad_token = tokenizer.eos_token
     attention_mask = torch.ones_like(input_ids).to("cpu")
@@ -146,7 +168,16 @@ def chat_with_model(user_input, chat_history=[]):
                              max_new_tokens=1200, do_sample=True,
                              top_k=50, temperature=0.7)
     response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)
     print("respone: ------------------------------------- \n"+response)
     chat_history.append((context, response))  # or chat_history.append((user_input, response)) if you want to store user input
     return response, chat_history

 from langfuse import Langfuse
+from langfuse.decorators import observe
 langfuse = Langfuse(
   secret_key="sk-lf-229e10c5-6210-4a4b-a432-0f17bc66e56c",
   host="https://chris4k-langfuse-template-space.hf.space"
 )
 # Load Llama 3.2 model
 model_name = "meta-llama/Llama-3.2-3B-Instruct"  # Replace with the exact model path
 tokenizer = AutoTokenizer.from_pretrained(model_name)
     print(prompt)
     return prompt
+@observe()
 def chat_with_model(user_input, chat_history=[]):
     # Search for relevant products
     search_results = search_products(user_input)
     else:
         context = "Das weiß ich nicht."
     print("context: ------------------------------------- \n"+context)
+    langfuse.observe(
+        name="search_products",
+        input={"query": user_input},
+        output={"context": context},
+        metadata={"search_results_found": len(search_results) if search_results else 0}
+    )
     # Pass both user_input and context to construct_prompt
     prompt = construct_prompt(user_input, context, chat_history) # This line is changed
     print("prompt: ------------------------------------- \n"+prompt)
+    # LangFuse observation: Log prompt construction
+    langfuse.observe(
+        name="construct_prompt",
+        input={"user_input": user_input, "context": context, "chat_history": chat_history},
+        output={"prompt": prompt}
+    )
     input_ids = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=4096).to("cpu")
     tokenizer.pad_token = tokenizer.eos_token
     attention_mask = torch.ones_like(input_ids).to("cpu")
                              max_new_tokens=1200, do_sample=True,
                              top_k=50, temperature=0.7)
     response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)
     print("respone: ------------------------------------- \n"+response)
+    # LangFuse observation: Log LLM response
+    langfuse.observe(
+        name="llm_response",
+        input={"prompt": prompt},
+        output={"response": response},
+        metadata={"response_length": len(response)}
+    )
     chat_history.append((context, response))  # or chat_history.append((user_input, response)) if you want to store user input
     return response, chat_history