Spaces:

MyEnny
/

Chat_bot

Runtime error

App Files Files Community

MyEnny commited on Aug 15, 2025

Commit

43abb59

verified ·

1 Parent(s): eb3c7b8

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -81

app.py CHANGED Viewed

@@ -1,15 +1,8 @@
 import os
 import zipfile
-import torch
 import gradio as gr
-from transformers import (
-    AutoTokenizer,
-    AutoModelForCausalLM,
-    BitsAndBytesConfig,
-    pipeline,
-)
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.llms import HuggingFacePipeline
@@ -23,108 +16,52 @@ if not os.path.exists("faiss_index") and os.path.exists("faiss_index.zip"):
         zip_ref.extractall(".")
 # --- Step 2: Load embedding and vectorstore ---
-embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-vectordb = FAISS.load_local("faiss_index", embedding_model, allow_dangerous_deserialization=True)
-# --- Step 3: Load the LLM (memory-efficient) ---
 model_id = "tiiuae/falcon3-1b-instruct"
-tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
-# 4-bit quantisation to stay within L4 memory
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_compute_dtype=torch.float16,
-    bnb_4bit_use_double_quant=True,
-    bnb_4bit_quant_type="nf4",
-)
-model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    quantization_config=bnb_config,
-    device_map="auto",           # place layers on GPU/CPU automatically if needed
-    torch_dtype=torch.float16,   # keeps activation memory down
-    low_cpu_mem_usage=True,
-)
-model.eval()
-torch.set_grad_enabled(False)
 pipe = pipeline(
-    task="text-generation",
     model=model,
     tokenizer=tokenizer,
     pad_token_id=tokenizer.eos_token_id,
-    max_new_tokens=160,     # keep this modest to avoid spikes
     do_sample=True,
-    temperature=0.7,
-    top_p=0.9,
 )
 llm = HuggingFacePipeline(pipeline=pipe)
 # --- Step 4: Setup memory and QA chain ---
 memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
 prompt = PromptTemplate.from_template("""
-You are a helpful assistant at the University of Hertfordshire.
-Use the provided context to create a **clear, concise, step-by-step answer** in friendly, student-friendly language.
-Do not copy the context verbatim—paraphrase where possible.
-Remove any irrelevant details.
-If the answer is not in the context, reply: "I don't know."
-Format the answer like this:
-1) Step one...
-2) Step two...
-(Use numbered steps where possible.)
 Context:
 {context}
 Question:
 {question}
-Final refined answer:
-""".strip()
-)
-def refine_answer(raw_answer):
-    # Remove extra markers
-    text = raw_answer.strip()
-    text = text.replace("Helpful answer:", "").strip()
-    # Capitalise first letter if missing
-    if text and not text[0].isupper():
-        text = text[0].upper() + text[1:]
-    return text
 qa_chain = ConversationalRetrievalChain.from_llm(
     llm=llm,
     retriever=vectordb.as_retriever(search_kwargs={"k": 3}),
     memory=memory,
     chain_type="stuff",
-    combine_docs_chain_kwargs={"prompt": prompt},
 )
 UH_LOGO = "images/UH.png"
 # --- Step 5: Define chatbot logic ---
-def refine_answer(raw_answer: str) -> str:
-    """Clean and polish raw model output."""
-    text = raw_answer.strip()
-    # Remove prompt artifacts
-    for marker in ["Helpful answer:", "<|assistant|>", "Refined helpful answer:"]:
-        text = text.replace(marker, "")
-    # Normalise spaces
-    text = " ".join(text.split())
-    return text
 def chat(message, history):
     result = qa_chain.invoke({"question": message})
-    response = refine_answer(result.get("answer", ""))
-    # keep GPU clean between turns (helps on Spaces)
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
     return response
 # --- Step 6: UI ---
@@ -156,5 +93,4 @@ with gr.Blocks() as demo:
     submit.click(respond, [txt, chatbot], [txt, chatbot])
     txt.submit(respond, [txt, chatbot], [txt, chatbot])
-if __name__ == "__main__":
-    demo.launch()

 import os
 import zipfile
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.llms import HuggingFacePipeline
         zip_ref.extractall(".")
 # --- Step 2: Load embedding and vectorstore ---
+embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
+vectordb = FAISS.load_local("faiss_index", embedding_model,allow_dangerous_deserialization=True)
+# --- Step 3: Load the LLM ---
 model_id = "tiiuae/falcon3-1b-instruct"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
 pipe = pipeline(
+    "text-generation",
     model=model,
     tokenizer=tokenizer,
     pad_token_id=tokenizer.eos_token_id,
+    max_new_tokens=200,
     do_sample=True,
+    temperature=1.0,
 )
 llm = HuggingFacePipeline(pipeline=pipe)
 # --- Step 4: Setup memory and QA chain ---
 memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
 prompt = PromptTemplate.from_template("""
+You are a helpful assistant at the University of Hertfordshire. Use the context below to answer the question clearly and factually.
+If the answer is not in the context, say you don't know.
 Context:
 {context}
 Question:
 {question}
+Helpful Answer:
+""")
 qa_chain = ConversationalRetrievalChain.from_llm(
     llm=llm,
     retriever=vectordb.as_retriever(search_kwargs={"k": 3}),
     memory=memory,
     chain_type="stuff",
+    combine_docs_chain_kwargs={"prompt": prompt}
 )
 UH_LOGO = "images/UH.png"
 # --- Step 5: Define chatbot logic ---
 def chat(message, history):
     result = qa_chain.invoke({"question": message})
+    response = result.get("answer", "")
+    response = response.split("Answer:")[-1].replace("<|assistant|>", "").strip()
     return response
 # --- Step 6: UI ---
     submit.click(respond, [txt, chatbot], [txt, chatbot])
     txt.submit(respond, [txt, chatbot], [txt, chatbot])
+demo.launch()