Spaces:

0ndr3
/

raffle_assistant

Runtime error

App Files Files Community

0ndr3 commited on May 29, 2025

Commit

e1cc806

verified ·

1 Parent(s): a95a961

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -24

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 # IMPORTS
 import warnings
 warnings.filterwarnings("ignore", message="Failed to load HostKeys")
 warnings.filterwarnings("ignore", message="The 'tuples' format for chatbot messages is deprecated")
@@ -17,7 +18,7 @@ from langchain_core.runnables import RunnableMap, RunnableLambda
 from langchain.memory import ConversationBufferMemory
 from langchain_groq import ChatGroq
-# ─── Secrets & Paths ────────────────────────────────────────────────────────────
 SFTP_HOST       = os.getenv("SFTP_HOST")
 SFTP_USER       = os.getenv("SFTP_USER")
@@ -26,11 +27,11 @@ SFTP_ALERTS_DIR = "/home/birkbeck/alerts"
 GROQ_API_KEY    = os.getenv("GROQ_API_KEY")
 HISTORICAL_JSON = "data/big_prize_data.json"
-# ─── Chat Memory ────────────────────────────────────────────────────────────────
 memory = ConversationBufferMemory(memory_key="chat_history", input_key="question")
-# ─── 1) Build Historical Chroma DB ───────────────────────────────────────────────
 def build_chroma_db():
     with open(HISTORICAL_JSON) as f:
@@ -60,7 +61,7 @@ def build_chroma_db():
     emb = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
     return Chroma.from_documents(docs, emb)
-# ─── 2) Load Live Alerts via SFTP ───────────────────────────────────────────────
 def load_live_alerts():
     cnopts = pysftp.CnOpts()
@@ -111,22 +112,23 @@ def load_live_alerts():
             alerts.append(Document(page_content=content, metadata=md))
     return alerts
-# ─── 3) Retriever ───────────────────────────────────────────────────────────────
-db        = build_chroma_db()
 live_docs = load_live_alerts()
 def combined_docs(q: str):
     hist = db.similarity_search(q, k=8)
     return hist + live_docs
-# ─── 4) Prompt + Filter Chain ───────────────────────────────────────────────────
 prompt = PromptTemplate(
     input_variables=["chat_history","context","question"],
     template="""
 You are **Rafael The Raffler**, a calm friendly expert in instant-win raffle analysis.
-If asked “what do you do?”, give a bullet list of your strengths (raffle timing, value insights, patterns).
 Reasoning Rules:
 1. **Interpreting “When”:** Whenever the user asks “When…?”, interpret that as “At what tickets-sold count and percent did the prize win occur?”  Do *not* give calendar dates or times.
 --- Conversation So Far ---
@@ -140,23 +142,21 @@ Reasoning Rules:
 def filter_docs(inputs):
     docs, q = inputs["documents"], inputs["question"].lower()
-    # live‐prize query?
     if ("live" in q or "latest" in q or "recent" in q) and any(w in q for w in ("prize","raffle","won")):
         live = [d for d in docs if d.metadata["source"]=="recent and live"]
         if live:
             recent = max(live, key=lambda d: parser.isoparse(d.metadata["timestamp"]))
             return {"documents":[recent], "question":q}
-    # threshold filter
     m = re.search(r"(?:above|over|greater than)\s*£?([\d,]+)", q)
     if m:
         thr = float(m.group(1).replace(",",""))
         docs = [d for d in docs if d.metadata["value"] > thr]
     return {"documents":docs, "question":q}
-# ─── Follow-up Question Rewriting ───────────────────────────────────────────────
-# This template will turn "How many big prizes...?" into
-# "In raffle 86, how many big prizes in total were won?"
 question_rewrite_template = PromptTemplate(
     input_variables=["chat_history","question"],
     template="""
@@ -172,30 +172,27 @@ Rewritten standalone question:"""
 )
 rewrite_chain = (
-    # bundle history + raw question
     RunnableLambda(lambda q: {
         "chat_history": memory.load_memory_variables({})["chat_history"],
         "question": q
     })
-    # build the rewrite prompt
     | RunnableLambda(lambda inp: question_rewrite_template.format(**inp))
-    # call the LLM to rewrite
     | ChatGroq(api_key=GROQ_API_KEY, model="llama3-8b-8192")
     | StrOutputParser()
 )
-# ─── 5) RAG + ChatGroq Chain (with rewrite) ────────────────────────────────────
 retrieval_chain = (
-    # 1) Rewrite the question first
     rewrite_chain
-    # 2) Retrieve docs against the rewritten question
     | RunnableMap({
         "documents": lambda rewritten_q: combined_docs(rewritten_q),
         "question":  lambda rewritten_q: rewritten_q
     })
     | RunnableLambda(filter_docs)
-    # 3) Build final inputs and truncate history
     | RunnableLambda(lambda d: {
         "chat_history": "\n".join(
             memory.load_memory_variables({})["chat_history"].splitlines()[-4:]
@@ -203,13 +200,13 @@ retrieval_chain = (
         "context": "\n".join(doc.page_content for doc in d["documents"]),
         "question": d["question"]
     })
-    # 4) Format final prompt and call LLM
     | RunnableLambda(lambda inp: prompt.format(**inp))
     | ChatGroq(api_key=GROQ_API_KEY, model="llama3-8b-8192")
     | StrOutputParser()
 )
-# ─── 6) Gradio Interface ───────────────────────────────────────────────────────
 WELCOME = """
 👋 **Welcome to Rafael The Raffler**
@@ -217,10 +214,22 @@ Your raffle-analysis assistant with RAG.
 Ask about raffle wins, ticket timing, prize values or the latest live raffle.
 """
 def gradio_chat(question: str) -> str:
-    # run RAG chain
     answer = retrieval_chain.invoke(question)
-    # store in memory for multi‐turn
     memory.save_context({"question": question}, {"answer": answer})
     return answer

 # IMPORTS
 import warnings
 warnings.filterwarnings("ignore", message="Failed to load HostKeys")
 warnings.filterwarnings("ignore", message="The 'tuples' format for chatbot messages is deprecated")
 from langchain.memory import ConversationBufferMemory
 from langchain_groq import ChatGroq
+# SECRETS & PATHS
 SFTP_HOST       = os.getenv("SFTP_HOST")
 SFTP_USER       = os.getenv("SFTP_USER")
 GROQ_API_KEY    = os.getenv("GROQ_API_KEY")
 HISTORICAL_JSON = "data/big_prize_data.json"
+# CHAT MEMORY
 memory = ConversationBufferMemory(memory_key="chat_history", input_key="question")
+# BUILD HISTORICAL CHROMA DB
 def build_chroma_db():
     with open(HISTORICAL_JSON) as f:
     emb = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
     return Chroma.from_documents(docs, emb)
+# LOAD RECENT/LIVE ALERTS VIA SFTP
 def load_live_alerts():
     cnopts = pysftp.CnOpts()
             alerts.append(Document(page_content=content, metadata=md))
     return alerts
+# RETRIEVER
+db = build_chroma_db()
 live_docs = load_live_alerts()
 def combined_docs(q: str):
     hist = db.similarity_search(q, k=8)
     return hist + live_docs
+# PROMPT + FILTER CHAIN
 prompt = PromptTemplate(
     input_variables=["chat_history","context","question"],
     template="""
 You are **Rafael The Raffler**, a calm friendly expert in instant-win raffle analysis.
+**Only** describe your strengths (raffle timing, value insights, patterns) when the user explicitly asks “what do you do?” or "what you good at?".
+If they merely greet you or ask anything else, do **not** list your strengths—just answer the question.
 Reasoning Rules:
 1. **Interpreting “When”:** Whenever the user asks “When…?”, interpret that as “At what tickets-sold count and percent did the prize win occur?”  Do *not* give calendar dates or times.
 --- Conversation So Far ---
 def filter_docs(inputs):
     docs, q = inputs["documents"], inputs["question"].lower()
+    # RECENT/LIVE
     if ("live" in q or "latest" in q or "recent" in q) and any(w in q for w in ("prize","raffle","won")):
         live = [d for d in docs if d.metadata["source"]=="recent and live"]
         if live:
             recent = max(live, key=lambda d: parser.isoparse(d.metadata["timestamp"]))
             return {"documents":[recent], "question":q}
+    # THRESHOLD
     m = re.search(r"(?:above|over|greater than)\s*£?([\d,]+)", q)
     if m:
         thr = float(m.group(1).replace(",",""))
         docs = [d for d in docs if d.metadata["value"] > thr]
     return {"documents":docs, "question":q}
+# FOLLOW-UP QUESTION REWRITING
 question_rewrite_template = PromptTemplate(
     input_variables=["chat_history","question"],
     template="""
 )
 rewrite_chain = (
     RunnableLambda(lambda q: {
         "chat_history": memory.load_memory_variables({})["chat_history"],
         "question": q
     })
     | RunnableLambda(lambda inp: question_rewrite_template.format(**inp))
     | ChatGroq(api_key=GROQ_API_KEY, model="llama3-8b-8192")
     | StrOutputParser()
 )
+# RAG + CHATGROQ CHAIN (WITH REWRITE) ────────────────────────────────────
 retrieval_chain = (
+    # 1. REWRITE QUESTION FIRST
     rewrite_chain
+    # 2. RETRIEVE DOCS AGAINST REWRITTEN QUESTION
     | RunnableMap({
         "documents": lambda rewritten_q: combined_docs(rewritten_q),
         "question":  lambda rewritten_q: rewritten_q
     })
     | RunnableLambda(filter_docs)
+    # 3. BUILD FINAL INPUTS AND TRUNCATE HISTORY
     | RunnableLambda(lambda d: {
         "chat_history": "\n".join(
             memory.load_memory_variables({})["chat_history"].splitlines()[-4:]
         "context": "\n".join(doc.page_content for doc in d["documents"]),
         "question": d["question"]
     })
+    # 4. FORMAT FINAL PROMPT AND CALL LLM
     | RunnableLambda(lambda inp: prompt.format(**inp))
     | ChatGroq(api_key=GROQ_API_KEY, model="llama3-8b-8192")
     | StrOutputParser()
 )
+# GRADIO
 WELCOME = """
 👋 **Welcome to Rafael The Raffler**
 Ask about raffle wins, ticket timing, prize values or the latest live raffle.
 """
+# GREETING HANDLING
+def handle_greeting(question: str):
+    if re.match(r'^(hi|hello|hey)[.!?]*$', question.strip(), re.I):
+        return "Hello! How can I help you with your raffle analysis today?"
 def gradio_chat(question: str) -> str:
+    # 1. GREETING ONLY?
+    greet = handle_greeting(question)
+    if greet:
+        # SAVE GREETING
+        memory.save_context({"question": question}, {"answer": greet})
+        return greet
+    # 2. OTHERWISE > RAG CHAIN
     answer = retrieval_chain.invoke(question)
     memory.save_context({"question": question}, {"answer": answer})
     return answer