MyEnny commited on
Commit
43abb59
·
verified ·
1 Parent(s): eb3c7b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -81
app.py CHANGED
@@ -1,15 +1,8 @@
1
  import os
2
  import zipfile
3
- import torch
4
  import gradio as gr
5
 
6
- from transformers import (
7
- AutoTokenizer,
8
- AutoModelForCausalLM,
9
- BitsAndBytesConfig,
10
- pipeline,
11
- )
12
-
13
  from langchain.embeddings import HuggingFaceEmbeddings
14
  from langchain.vectorstores import FAISS
15
  from langchain.llms import HuggingFacePipeline
@@ -23,108 +16,52 @@ if not os.path.exists("faiss_index") and os.path.exists("faiss_index.zip"):
23
  zip_ref.extractall(".")
24
 
25
  # --- Step 2: Load embedding and vectorstore ---
26
- embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
27
- vectordb = FAISS.load_local("faiss_index", embedding_model, allow_dangerous_deserialization=True)
28
 
29
- # --- Step 3: Load the LLM (memory-efficient) ---
30
  model_id = "tiiuae/falcon3-1b-instruct"
31
-
32
- tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
33
-
34
- # 4-bit quantisation to stay within L4 memory
35
- bnb_config = BitsAndBytesConfig(
36
- load_in_4bit=True,
37
- bnb_4bit_compute_dtype=torch.float16,
38
- bnb_4bit_use_double_quant=True,
39
- bnb_4bit_quant_type="nf4",
40
- )
41
-
42
- model = AutoModelForCausalLM.from_pretrained(
43
- model_id,
44
- quantization_config=bnb_config,
45
- device_map="auto", # place layers on GPU/CPU automatically if needed
46
- torch_dtype=torch.float16, # keeps activation memory down
47
- low_cpu_mem_usage=True,
48
- )
49
- model.eval()
50
- torch.set_grad_enabled(False)
51
 
52
  pipe = pipeline(
53
- task="text-generation",
54
  model=model,
55
  tokenizer=tokenizer,
56
  pad_token_id=tokenizer.eos_token_id,
57
- max_new_tokens=160, # keep this modest to avoid spikes
58
  do_sample=True,
59
- temperature=0.7,
60
- top_p=0.9,
61
  )
62
-
63
  llm = HuggingFacePipeline(pipeline=pipe)
64
 
65
  # --- Step 4: Setup memory and QA chain ---
66
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
67
 
68
  prompt = PromptTemplate.from_template("""
69
- You are a helpful assistant at the University of Hertfordshire.
70
- Use the provided context to create a **clear, concise, step-by-step answer** in friendly, student-friendly language.
71
- Do not copy the context verbatim—paraphrase where possible.
72
- Remove any irrelevant details.
73
- If the answer is not in the context, reply: "I don't know."
74
-
75
- Format the answer like this:
76
- 1) Step one...
77
- 2) Step two...
78
- (Use numbered steps where possible.)
79
-
80
  Context:
81
  {context}
82
-
83
  Question:
84
  {question}
 
 
85
 
86
- Final refined answer:
87
- """.strip()
88
- )
89
-
90
- def refine_answer(raw_answer):
91
- # Remove extra markers
92
- text = raw_answer.strip()
93
- text = text.replace("Helpful answer:", "").strip()
94
- # Capitalise first letter if missing
95
- if text and not text[0].isupper():
96
- text = text[0].upper() + text[1:]
97
- return text
98
-
99
  qa_chain = ConversationalRetrievalChain.from_llm(
100
  llm=llm,
101
  retriever=vectordb.as_retriever(search_kwargs={"k": 3}),
102
  memory=memory,
103
  chain_type="stuff",
104
- combine_docs_chain_kwargs={"prompt": prompt},
105
  )
106
 
107
  UH_LOGO = "images/UH.png"
108
-
109
  # --- Step 5: Define chatbot logic ---
110
-
111
- def refine_answer(raw_answer: str) -> str:
112
- """Clean and polish raw model output."""
113
- text = raw_answer.strip()
114
- # Remove prompt artifacts
115
- for marker in ["Helpful answer:", "<|assistant|>", "Refined helpful answer:"]:
116
- text = text.replace(marker, "")
117
- # Normalise spaces
118
- text = " ".join(text.split())
119
- return text
120
-
121
  def chat(message, history):
122
  result = qa_chain.invoke({"question": message})
123
- response = refine_answer(result.get("answer", ""))
124
-
125
- # keep GPU clean between turns (helps on Spaces)
126
- if torch.cuda.is_available():
127
- torch.cuda.empty_cache()
128
  return response
129
 
130
  # --- Step 6: UI ---
@@ -156,5 +93,4 @@ with gr.Blocks() as demo:
156
  submit.click(respond, [txt, chatbot], [txt, chatbot])
157
  txt.submit(respond, [txt, chatbot], [txt, chatbot])
158
 
159
- if __name__ == "__main__":
160
- demo.launch()
 
1
  import os
2
  import zipfile
 
3
  import gradio as gr
4
 
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 
 
 
 
 
 
6
  from langchain.embeddings import HuggingFaceEmbeddings
7
  from langchain.vectorstores import FAISS
8
  from langchain.llms import HuggingFacePipeline
 
16
  zip_ref.extractall(".")
17
 
18
  # --- Step 2: Load embedding and vectorstore ---
19
+ embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
20
+ vectordb = FAISS.load_local("faiss_index", embedding_model,allow_dangerous_deserialization=True)
21
 
22
+ # --- Step 3: Load the LLM ---
23
  model_id = "tiiuae/falcon3-1b-instruct"
24
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
25
+ model = AutoModelForCausalLM.from_pretrained(model_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  pipe = pipeline(
28
+ "text-generation",
29
  model=model,
30
  tokenizer=tokenizer,
31
  pad_token_id=tokenizer.eos_token_id,
32
+ max_new_tokens=200,
33
  do_sample=True,
34
+ temperature=1.0,
 
35
  )
 
36
  llm = HuggingFacePipeline(pipeline=pipe)
37
 
38
  # --- Step 4: Setup memory and QA chain ---
39
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
40
 
41
  prompt = PromptTemplate.from_template("""
42
+ You are a helpful assistant at the University of Hertfordshire. Use the context below to answer the question clearly and factually.
43
+ If the answer is not in the context, say you don't know.
 
 
 
 
 
 
 
 
 
44
  Context:
45
  {context}
 
46
  Question:
47
  {question}
48
+ Helpful Answer:
49
+ """)
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  qa_chain = ConversationalRetrievalChain.from_llm(
52
  llm=llm,
53
  retriever=vectordb.as_retriever(search_kwargs={"k": 3}),
54
  memory=memory,
55
  chain_type="stuff",
56
+ combine_docs_chain_kwargs={"prompt": prompt}
57
  )
58
 
59
  UH_LOGO = "images/UH.png"
 
60
  # --- Step 5: Define chatbot logic ---
 
 
 
 
 
 
 
 
 
 
 
61
  def chat(message, history):
62
  result = qa_chain.invoke({"question": message})
63
+ response = result.get("answer", "")
64
+ response = response.split("Answer:")[-1].replace("<|assistant|>", "").strip()
 
 
 
65
  return response
66
 
67
  # --- Step 6: UI ---
 
93
  submit.click(respond, [txt, chatbot], [txt, chatbot])
94
  txt.submit(respond, [txt, chatbot], [txt, chatbot])
95
 
96
+ demo.launch()