Spaces:

NSamson1
/

Finance

Sleeping

App Files Files Community

NSamson1 commited on Jul 4, 2025

Commit

174562d

verified ·

1 Parent(s): 682799b

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -77

app.py CHANGED Viewed

@@ -9,65 +9,49 @@ from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# ------------------------------------------------------------------
-# 1. Load and Prepare the Bank FAQ Dataset (UNCHANGED)
-# ------------------------------------------------------------------
 ds = load_dataset("maxpro291/bankfaqs_dataset")
-train_ds = ds['train']
-data = train_ds[:]  # load all examples
-questions = []
-answers = []
-for entry in data['text']:
-    if entry.startswith("Q:"):
-        questions.append(entry)
-    elif entry.startswith("A:"):
-        answers.append(entry)
-Bank_Data = pd.DataFrame({'question': questions, 'answer': answers})
-context_data = []
-for i in range(len(Bank_Data)):
-    context = f"Question: {Bank_Data.iloc[i]['question']} Answer: {Bank_Data.iloc[i]['answer']}"
-    context_data.append(context)
-# ------------------------------------------------------------------
-# 2. Create the Vector Store for Retrieval (UNCHANGED)
-# ------------------------------------------------------------------
 embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 vectorstore = Chroma.from_texts(
-    texts=context_data,
     embedding=embed_model,
     persist_directory="./chroma_db_bank"
 )
 retriever = vectorstore.as_retriever()
-# ------------------------------------------------------------------
-# 3. Initialize PHI-2 Model (MODIFIED SECTION)
-# ------------------------------------------------------------------
-model_name = "microsoft/phi-2"
-# Configure 4-bit quantization for efficient loading
-quantization_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_compute_dtype="float16",
     bnb_4bit_quant_type="nf4"
 )
-tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
-    model_name,
     device_map="auto",
     trust_remote_code=True,
-    quantization_config=quantization_config
 )
-# Create text-generation pipeline with Phi-2 specific settings
 pipe = pipeline(
     "text-generation",
     model=model,
@@ -75,25 +59,16 @@ pipe = pipeline(
     max_new_tokens=512,
     temperature=0.7,
     top_p=0.95,
-    repetition_penalty=1.15,
-    do_sample=True
 )
-# Wrap the pipeline in LangChain's HuggingFacePipeline
 huggingface_model = HuggingFacePipeline(pipeline=pipe)
-# ------------------------------------------------------------------
-# 4. Build the RAG Chain (UNCHANGED)
-# ------------------------------------------------------------------
-template = (
-    "You are a helpful banking assistant. "
-    "Use the provided context if it is relevant to answer the question. "
-    "If not, answer using your general banking knowledge.\n"
-    "Question: {question}\n"
-    "Answer:"
-)
 rag_prompt = PromptTemplate.from_template(template)
 rag_chain = (
     {"context": retriever, "question": RunnablePassthrough()}
     | rag_prompt
@@ -101,38 +76,56 @@ rag_chain = (
     | StrOutputParser()
 )
-# ------------------------------------------------------------------
-# 5. Gradio Chat Interface (UNCHANGED)
-# ------------------------------------------------------------------
 def rag_memory_stream(message, history):
     partial_text = ""
     for new_text in rag_chain.stream(message):
         partial_text += new_text
         yield partial_text
-examples = [
-    "I want to open an account",
-    "What is a savings account?",
-    "How do I use an ATM?",
-    "How can I resolve a bank account issue?"
-]
-title = "Your Personal Banking Assistant 💬"
-description = (
-    "Welcome! I'm here to answer your questions about banking and related topics. "
-    "Ask me anything, and I'll do my best to assist you."
-)
 demo = gr.ChatInterface(
     fn=rag_memory_stream,
-    title=title,
-    description=description,
-    examples=examples,
-    theme="glass",
 )
-# ------------------------------------------------------------------
-# 6. Launch the App (UNCHANGED)
-# ------------------------------------------------------------------
 if __name__ == "__main__":
-    demo.launch(share=True)

 from langchain_core.runnables import RunnablePassthrough
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
+from fastapi import FastAPI, Header, HTTPException
+import threading
+import uvicorn
+# ====================== CONFIGURATION ======================
+API_KEY = "Samson"  # Your hardcoded API key
+MODEL_NAME = "microsoft/phi-2"  # Using Phi-2 model
+# ===========================================================
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# ---------------------- RAG Setup --------------------------
+# 1. Load and prepare dataset
 ds = load_dataset("maxpro291/bankfaqs_dataset")
+data = ds['train'][:]
+Bank_Data = pd.DataFrame({
+    'question': [entry for entry in data['text'] if entry.startswith("Q:")],
+    'answer': [entry for entry in data['text'] if entry.startswith("A:")]
+})
+# 2. Create vector store
 embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 vectorstore = Chroma.from_texts(
+    texts=[f"Q: {q}\nA: {a}" for q, a in zip(Bank_Data['question'], Bank_Data['answer'])],
     embedding=embed_model,
     persist_directory="./chroma_db_bank"
 )
 retriever = vectorstore.as_retriever()
+# 3. Initialize LLM with 4-bit quantization
+quant_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_compute_dtype="float16",
     bnb_4bit_quant_type="nf4"
 )
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME,
     device_map="auto",
     trust_remote_code=True,
+    quantization_config=quant_config
 )
 pipe = pipeline(
     "text-generation",
     model=model,
     max_new_tokens=512,
     temperature=0.7,
     top_p=0.95,
+    repetition_penalty=1.15
 )
 huggingface_model = HuggingFacePipeline(pipeline=pipe)
+# 4. Build RAG chain
+template = """You are a banking assistant. Use context if relevant:
+Context: {context}
+Question: {question}
+Answer:"""
 rag_prompt = PromptTemplate.from_template(template)
 rag_chain = (
     {"context": retriever, "question": RunnablePassthrough()}
     | rag_prompt
     | StrOutputParser()
 )
+# ---------------------- API Setup --------------------------
+app = FastAPI()
+def validate_api_key(api_key: str = Header(None)):
+    if api_key != API_KEY:
+        raise HTTPException(status_code=401, detail="Invalid API Key")
+    return True
+@app.post("/chat")
+async def chat_endpoint(
+    question: str,
+    authorization: str = Header(None),
+):
+    validate_api_key(authorization)
+    response = ""
+    for chunk in rag_chain.stream(question):
+        response += chunk
+    return {"response": response}
+@app.get("/health")
+async def health_check():
+    return {"status": "healthy"}
+# -------------------- Gradio Interface ---------------------
 def rag_memory_stream(message, history):
     partial_text = ""
     for new_text in rag_chain.stream(message):
         partial_text += new_text
         yield partial_text
 demo = gr.ChatInterface(
     fn=rag_memory_stream,
+    title="Banking Assistant 🔒 (API Key: Samson)",
+    description="Welcome! Use API key 'Samson' to access the /chat endpoint",
+    examples=[
+        "How do I open an account?",
+        "What's the interest rate for savings?",
+        "How do I apply for a loan?"
+    ],
+    theme="glass"
 )
+# --------------------- Launch Servers ----------------------
+def run_gradio():
+    demo.launch(server_name="0.0.0.0", server_port=7860)
 if __name__ == "__main__":
+    # Start Gradio in separate thread
+    gradio_thread = threading.Thread(target=run_gradio)
+    gradio_thread.start()
+    # Start FastAPI
+    uvicorn.run(app, host="0.0.0.0", port=8000)