Spaces:

NSamson1
/

Finance

Sleeping

App Files Files Community

NSamson1 commited on Jul 4, 2025

Commit

6f344a4

verified ·

1 Parent(s): 0f5b97b

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -47

app.py CHANGED Viewed

@@ -10,7 +10,6 @@ from langchain_community.vectorstores import Chroma
 from langchain_core.prompts import PromptTemplate
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
-# Transformers and datasets
 from datasets import load_dataset
 from transformers import (
     AutoTokenizer,
@@ -18,57 +17,100 @@ from transformers import (
     pipeline,
     BitsAndBytesConfig
 )
 # ====================== CONFIGURATION ======================
 API_KEY = "Samson"
-MODEL_NAME = "microsoft/phi-2"
-# Set up logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# ---------------------- RAG Setup --------------------------
-# 1. Load dataset
-ds = load_dataset("maxpro291/bankfaqs_dataset")
-data = ds['train'][:]
-Bank_Data = pd.DataFrame({
-    'question': [entry for entry in data['text'] if entry.startswith("Q:")],
-    'answer': [entry for entry in data['text'] if entry.startswith("A:")]
-})
-# 2. Create vector store
-embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-vectorstore = Chroma.from_texts(
-    texts=[f"Q: {q}\nA: {a}" for q, a in zip(Bank_Data['question'], Bank_Data['answer'])],
-    embedding=embed_model,
-    persist_directory="./chroma_db_bank"
-)
-retriever = vectorstore.as_retriever()
-# 3. Initialize LLM
-quant_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_compute_dtype="float16",
-    bnb_4bit_quant_type="nf4"
-)
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_NAME,
-    device_map="auto",
-    trust_remote_code=True,
-    quantization_config=quant_config
-)
-# Create LangChain pipeline
-llm_pipeline = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    max_new_tokens=512,
-    temperature=0.7,
-    top_p=0.95
-)
-# 4. Build RAG chain
 template = """You are a banking assistant. Use context if relevant:
 Context: {context}
 Question: {question}
@@ -82,7 +124,9 @@ rag_chain = (
     | StrOutputParser()
 )
-# ---------------------- API Setup --------------------------
 app = FastAPI()
 def validate_api_key(api_key: str = Header(None)):
@@ -98,7 +142,9 @@ async def chat_endpoint(question: str, authorization: str = Header(None)):
         response += chunk
     return {"response": response}
-# -------------------- Gradio Interface ---------------------
 def respond(message, history):
     return next(rag_chain.stream(message))
@@ -113,11 +159,15 @@ demo = gr.ChatInterface(
     theme="glass"
 )
-# --------------------- Launch Servers ----------------------
 if __name__ == "__main__":
     threading.Thread(
         target=demo.launch,
-        kwargs={"server_name": "0.0.0.0", "server_port": 7860}
     ).start()
     uvicorn.run(app, host="0.0.0.0", port=8000)

 from langchain_core.prompts import PromptTemplate
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
 from datasets import load_dataset
 from transformers import (
     AutoTokenizer,
     pipeline,
     BitsAndBytesConfig
 )
+import torch  # Explicitly imported for CUDA management
 # ====================== CONFIGURATION ======================
 API_KEY = "Samson"
+MODEL_NAME = "microsoft/phi-2"
+EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
+# ===========================================================
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+# Clear CUDA cache if using GPU
+if torch.cuda.is_available():
+    torch.cuda.empty_cache()
+# ------------------------------------------------------------------
+# 1. Load and Prepare Dataset
+# ------------------------------------------------------------------
+def load_data():
+    try:
+        ds = load_dataset("maxpro291/bankfaqs_dataset")
+        data = ds['train'][:]
+        questions = [entry for entry in data['text'] if entry.startswith("Q:")]
+        answers = [entry for entry in data['text'] if entry.startswith("A:")]
+        return pd.DataFrame({'question': questions, 'answer': answers})
+    except Exception as e:
+        logging.error(f"Error loading dataset: {str(e)}")
+        raise
+# ------------------------------------------------------------------
+# 2. Initialize Embeddings and Vector Store
+# ------------------------------------------------------------------
+def init_vectordb(data):
+    try:
+        embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
+        texts = [f"Q: {q}\nA: {a}" for q, a in zip(data['question'], data['answer'])]
+        return Chroma.from_texts(
+            texts=texts,
+            embedding=embeddings,
+            persist_directory="./chroma_db_bank"
+        )
+    except Exception as e:
+        logging.error(f"Error initializing vector store: {str(e)}")
+        raise
+# ------------------------------------------------------------------
+# 3. Initialize LLM with Quantization
+# ------------------------------------------------------------------
+def load_llm():
+    try:
+        quantization_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype="float16"
+        )
+        tokenizer = AutoTokenizer.from_pretrained(
+            MODEL_NAME,
+            trust_remote_code=True,
+            padding_side="left"  # Critical for phi-2
+        )
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_NAME,
+            device_map="auto",
+            trust_remote_code=True,
+            quantization_config=quantization_config
+        )
+        return pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            max_new_tokens=512,
+            temperature=0.7,
+            top_p=0.95,
+            do_sample=True
+        )
+    except Exception as e:
+        logging.error(f"Error loading LLM: {str(e)}")
+        raise
+# Initialize components
+bank_data = load_data()
+retriever = init_vectordb(bank_data).as_retriever()
+llm_pipeline = load_llm()
+# ------------------------------------------------------------------
+# 4. Build RAG Chain
+# ------------------------------------------------------------------
 template = """You are a banking assistant. Use context if relevant:
 Context: {context}
 Question: {question}
     | StrOutputParser()
 )
+# ------------------------------------------------------------------
+# 5. FastAPI Setup
+# ------------------------------------------------------------------
 app = FastAPI()
 def validate_api_key(api_key: str = Header(None)):
         response += chunk
     return {"response": response}
+# ------------------------------------------------------------------
+# 6. Gradio Interface
+# ------------------------------------------------------------------
 def respond(message, history):
     return next(rag_chain.stream(message))
     theme="glass"
 )
+# ------------------------------------------------------------------
+# 7. Launch Servers
+# ------------------------------------------------------------------
 if __name__ == "__main__":
+    # Start Gradio in separate thread
     threading.Thread(
         target=demo.launch,
+        kwargs={"server_name": "0.0.0.0", "server_port": 7860, "share": False}
     ).start()
+    # Start FastAPI
     uvicorn.run(app, host="0.0.0.0", port=8000)