Spaces:

NSamson1
/

Finance

Sleeping

App Files Files Community

NSamson1 commited on Jul 4, 2025

Commit

132540f

verified ·

1 Parent(s): cc4abf7

Update app.py

Browse files

Files changed (1) hide show

app.py +106 -137

app.py CHANGED Viewed

@@ -1,173 +1,142 @@
 import os
 import pandas as pd
 import logging
-import threading
-from fastapi import FastAPI, Header, HTTPException
-import uvicorn
-import gradio as gr
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain_community.vectorstores import Chroma
 from langchain_core.prompts import PromptTemplate
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
-from datasets import load_dataset
-from transformers import (
-    AutoTokenizer,
-    AutoModelForCausalLM,
-    pipeline,
-    BitsAndBytesConfig
-)
-import torch  # Explicitly imported for CUDA management
-# ====================== CONFIGURATION ======================
-API_KEY = "Samson"
-MODEL_NAME = "microsoft/phi-2"
-EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
-# ===========================================================
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s'
-)
-# Clear CUDA cache if using GPU
-if torch.cuda.is_available():
-    torch.cuda.empty_cache()
 # ------------------------------------------------------------------
-# 1. Load and Prepare Dataset
 # ------------------------------------------------------------------
-def load_data():
-    try:
-        ds = load_dataset("maxpro291/bankfaqs_dataset")
-        data = ds['train'][:]
-        questions = [entry for entry in data['text'] if entry.startswith("Q:")]
-        answers = [entry for entry in data['text'] if entry.startswith("A:")]
-        return pd.DataFrame({'question': questions, 'answer': answers})
-    except Exception as e:
-        logging.error(f"Error loading dataset: {str(e)}")
-        raise
 # ------------------------------------------------------------------
-# 2. Initialize Embeddings and Vector Store
 # ------------------------------------------------------------------
-def init_vectordb(data):
-    try:
-        embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
-        texts = [f"Q: {q}\nA: {a}" for q, a in zip(data['question'], data['answer'])]
-        return Chroma.from_texts(
-            texts=texts,
-            embedding=embeddings,
-            persist_directory="./chroma_db_bank"
-        )
-    except Exception as e:
-        logging.error(f"Error initializing vector store: {str(e)}")
-        raise
 # ------------------------------------------------------------------
-# 3. Initialize LLM with Quantization
 # ------------------------------------------------------------------
-def load_llm():
-    try:
-        quantization_config = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_use_double_quant=True,
-            bnb_4bit_quant_type="nf4",
-            bnb_4bit_compute_dtype="float16"
-        )
-        tokenizer = AutoTokenizer.from_pretrained(
-            MODEL_NAME,
-            trust_remote_code=True,
-            padding_side="left"  # Critical for phi-2
-        )
-        model = AutoModelForCausalLM.from_pretrained(
-            MODEL_NAME,
-            device_map="auto",
-            trust_remote_code=True,
-            quantization_config=quantization_config
-        )
-        return pipeline(
-            "text-generation",
-            model=model,
-            tokenizer=tokenizer,
-            max_new_tokens=512,
-            temperature=0.7,
-            top_p=0.95,
-            do_sample=True
-        )
-    except Exception as e:
-        logging.error(f"Error loading LLM: {str(e)}")
-        raise
-# Initialize components
-bank_data = load_data()
-retriever = init_vectordb(bank_data).as_retriever()
-llm_pipeline = load_llm()
 # ------------------------------------------------------------------
-# 4. Build RAG Chain
 # ------------------------------------------------------------------
-template = """You are a banking assistant. Use context if relevant:
-Context: {context}
-Question: {question}
-Answer:"""
-prompt = PromptTemplate.from_template(template)
 rag_chain = (
     {"context": retriever, "question": RunnablePassthrough()}
-    | prompt
-    | llm_pipeline
     | StrOutputParser()
 )
 # ------------------------------------------------------------------
-# 5. FastAPI Setup
 # ------------------------------------------------------------------
-app = FastAPI()
-def validate_api_key(api_key: str = Header(None)):
-    if api_key != API_KEY:
-        raise HTTPException(status_code=401, detail="Invalid API Key")
-    return True
-@app.post("/chat")
-async def chat_endpoint(question: str, authorization: str = Header(None)):
-    validate_api_key(authorization)
-    response = ""
-    for chunk in rag_chain.stream(question):
-        response += chunk
-    return {"response": response}
-# ------------------------------------------------------------------
-# 6. Gradio Interface
-# ------------------------------------------------------------------
-def respond(message, history):
-    return next(rag_chain.stream(message))
 demo = gr.ChatInterface(
-    fn=respond,
-    title="Banking Assistant 🔒",
-    examples=[
-        "How do I open an account?",
-        "What's the interest rate?",
-        "How do I apply for a loan?"
-    ],
-    theme="glass"
 )
 # ------------------------------------------------------------------
-# 7. Launch Servers
 # ------------------------------------------------------------------
 if __name__ == "__main__":
-    # Start Gradio in separate thread
-    threading.Thread(
-        target=demo.launch,
-        kwargs={"server_name": "0.0.0.0", "server_port": 7860, "share": False}
-    ).start()
-    # Start FastAPI
-    uvicorn.run(app, host="0.0.0.0", port=8000)

 import os
 import pandas as pd
 import logging
+from datasets import load_dataset
+from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
+from langchain_chroma import Chroma
 from langchain_core.prompts import PromptTemplate
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+# Set up logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # ------------------------------------------------------------------
+# 1. Load and Prepare the Bank FAQ Dataset
 # ------------------------------------------------------------------
+# Load the dataset from Hugging Face (Bank FAQs)
+ds = load_dataset("maxpro291/bankfaqs_dataset")
+train_ds = ds['train']
+data = train_ds[:]  # load all examples
+# Separate questions and answers from the 'text' field
+questions = []
+answers = []
+for entry in data['text']:
+    if entry.startswith("Q:"):
+        questions.append(entry)
+    elif entry.startswith("A:"):
+        answers.append(entry)
+# Create a DataFrame with questions and answers
+Bank_Data = pd.DataFrame({'question': questions, 'answer': answers})
+# Build context strings (combining question and answer) for the vector store
+context_data = []
+for i in range(len(Bank_Data)):
+    context = f"Question: {Bank_Data.iloc[i]['question']} Answer: {Bank_Data.iloc[i]['answer']}"
+    context_data.append(context)
 # ------------------------------------------------------------------
+# 2. Create the Vector Store for Retrieval
 # ------------------------------------------------------------------
+# Initialize the embedding model
+embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+# Create a Chroma vector store from the context data
+vectorstore = Chroma.from_texts(
+    texts=context_data,
+    embedding=embed_model,
+    persist_directory="./chroma_db_bank"
+)
+# Create a retriever from the vector store
+retriever = vectorstore.as_retriever()
 # ------------------------------------------------------------------
+# 3. Initialize the LLM for Generation
 # ------------------------------------------------------------------
+# Note:
+# The model "meta-llama/Llama-2-7b-chat-hf" is gated. If you have access,
+# authenticate using huggingface-cli login. Otherwise, switch to a public model.
+model_name = "gpt2"  # Replace with "meta-llama/Llama-2-7b-chat-hf" if you are authenticated.
+# Load tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name)
+# Create a text-generation pipeline
+pipe = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    max_length=512,
+    temperature=0.7,
+    top_p=0.95,
+    repetition_penalty=1.15
+)
+# Wrap the pipeline in LangChain's HuggingFacePipeline
+huggingface_model = HuggingFacePipeline(pipeline=pipe)
 # ------------------------------------------------------------------
+# 4. Build the Retrieval-Augmented Generation (RAG) Chain
 # ------------------------------------------------------------------
+# Define a prompt template that instructs the assistant to use provided context
+template = (
+    "You are a helpful banking assistant. "
+    "Use the provided context if it is relevant to answer the question. "
+    "If not, answer using your general banking knowledge.\n"
+    "Question: {question}\n"
+    "Answer:"
+)
+rag_prompt = PromptTemplate.from_template(template)
+# Build the RAG chain by piping the retriever, prompt, LLM, and an output parser
 rag_chain = (
     {"context": retriever, "question": RunnablePassthrough()}
+    | rag_prompt
+    | huggingface_model
     | StrOutputParser()
 )
 # ------------------------------------------------------------------
+# 5. Set Up the Gradio Chat Interface
 # ------------------------------------------------------------------
+def rag_memory_stream(message, history):
+    partial_text = ""
+    # Stream the generated answer
+    for new_text in rag_chain.stream(message):
+        partial_text += new_text
+        yield partial_text
+# Example questions
+examples = [
+    "I want to open an account",
+    "What is a savings account?",
+    "How do I use an ATM?",
+    "How can I resolve a bank account issue?"
+]
+title = "Your Personal Banking Assistant 💬"
+description = (
+    "Welcome! I’m here to answer your questions about banking and related topics. "
+    "Ask me anything, and I’ll do my best to assist you."
+)
+# Create a chat interface using Gradio
 demo = gr.ChatInterface(
+    fn=rag_memory_stream,
+    title=title,
+    description=description,
+    examples=examples,
+    theme="glass",
 )
 # ------------------------------------------------------------------
+# 6. Launch the App
 # ------------------------------------------------------------------
 if __name__ == "__main__":
+    demo.launch(share=True)