Aby commited on
Commit ·
7cfa59a
1
Parent(s): 20f5afd
clean code
Browse files
app.py
CHANGED
|
@@ -27,64 +27,12 @@ Settings.embed_model = HuggingFaceEmbedding(model_name=EMBED_MODEL, device="cpu"
|
|
| 27 |
# phi3 LLm (downloads ~2GB on first use)
|
| 28 |
# Model name and its tokenizer name are the same most of the times. check HF for tokenizer name if not found.
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
)
|
| 37 |
-
# Ensure pad_token is set for Phi-3
|
| 38 |
-
if tokenizer.pad_token is None:
|
| 39 |
-
tokenizer.pad_token = tokenizer.eos_token
|
| 40 |
-
tokenizer.pad_token_id = tokenizer.eos_token_id
|
| 41 |
-
|
| 42 |
-
# Create HuggingFaceLLM - try with tokenizer parameter first
|
| 43 |
-
try:
|
| 44 |
-
llm = HuggingFaceLLM(
|
| 45 |
-
model_name=MODEL_NAME,
|
| 46 |
-
tokenizer_name=MODEL_NAME,
|
| 47 |
-
context_window=4000,
|
| 48 |
-
max_new_tokens=512,
|
| 49 |
-
device_map="cpu",
|
| 50 |
-
model_kwargs={
|
| 51 |
-
"trust_remote_code": True,
|
| 52 |
-
"low_cpu_mem_usage": True,
|
| 53 |
-
"use_safetensors": True
|
| 54 |
-
},
|
| 55 |
-
tokenizer=tokenizer # Passing tokenizer avoids init error, but may fail later if not properly supported
|
| 56 |
-
)
|
| 57 |
-
except (TypeError, ValueError):
|
| 58 |
-
# If tokenizer parameter not supported, use workaround with __dict__
|
| 59 |
-
llm = HuggingFaceLLM(
|
| 60 |
-
model_name=MODEL_NAME,
|
| 61 |
-
tokenizer_name=MODEL_NAME,
|
| 62 |
-
context_window=4000,
|
| 63 |
-
max_new_tokens=512,
|
| 64 |
-
device_map="cpu",
|
| 65 |
-
model_kwargs={
|
| 66 |
-
"trust_remote_code": True,
|
| 67 |
-
"low_cpu_mem_usage": True,
|
| 68 |
-
"use_safetensors": True
|
| 69 |
-
},
|
| 70 |
-
tokenizer_kwargs={
|
| 71 |
-
"trust_remote_code": True,
|
| 72 |
-
"padding_side": "left"
|
| 73 |
-
}
|
| 74 |
-
)
|
| 75 |
-
# Bypass Pydantic's __setattr__ to set internal tokenizer attribute
|
| 76 |
-
object.__setattr__(llm, '_tokenizer', tokenizer)
|
| 77 |
-
|
| 78 |
-
return llm
|
| 79 |
-
|
| 80 |
-
# llm = only_in_case_phi3_model_loading()
|
| 81 |
-
if (1==1):
|
| 82 |
-
llm = HuggingFaceLLM(
|
| 83 |
-
model_name=MODEL_NAME,
|
| 84 |
-
tokenizer_name=MODEL_NAME,
|
| 85 |
-
context_window=32768,
|
| 86 |
-
max_new_tokens=512,
|
| 87 |
-
device_map="cpu")
|
| 88 |
|
| 89 |
qa_prompt = PromptTemplate(
|
| 90 |
"""<|im_start|>system
|
|
@@ -115,6 +63,10 @@ class ConstitutionRAGChatBot:
|
|
| 115 |
self.index = load_index_from_storage(storage_context)
|
| 116 |
|
| 117 |
self.query_engine = self.index.as_query_engine(llm=llm, chat_mode=True, similarity_top_k=TOP_K, response_mode="compact", text_qa_template=qa_prompt, memory=ChatMemoryBuffer.from_defaults(token_limit=MAX_HISTORY_TOKENS))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
def preprocess_query(self, query: str) -> str:
|
| 120 |
""" Preprocess user query to improve accuracy. """
|
|
@@ -126,11 +78,16 @@ class ConstitutionRAGChatBot:
|
|
| 126 |
""" Callback """
|
| 127 |
if not message.strip():
|
| 128 |
return "Please, Stick to the questions regarding the Constitutions. Thanks!"
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
| 130 |
try:
|
| 131 |
clean_query = self.preprocess_query(message)
|
| 132 |
# query RAG (auto embed, retrives, generate)
|
| 133 |
response = self.query_engine.query(clean_query)
|
|
|
|
|
|
|
| 134 |
|
| 135 |
if "Not Found" in response.response.lower():
|
| 136 |
return "Its my Bad. Might be there is no information on this topic into the constitution of India or Legal language is too hard for me too.. ;)"
|
|
@@ -162,5 +119,6 @@ def create_demo():
|
|
| 162 |
if __name__ == "__main__":
|
| 163 |
# Local test
|
| 164 |
demo = create_demo()
|
| 165 |
-
#demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)
|
| 166 |
-
demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
|
|
|
|
|
|
| 27 |
# phi3 LLm (downloads ~2GB on first use)
|
| 28 |
# Model name and its tokenizer name are the same most of the times. check HF for tokenizer name if not found.
|
| 29 |
|
| 30 |
+
llm = HuggingFaceLLM(
|
| 31 |
+
model_name=MODEL_NAME,
|
| 32 |
+
tokenizer_name=MODEL_NAME,
|
| 33 |
+
context_window=32768,
|
| 34 |
+
max_new_tokens=512,
|
| 35 |
+
device_map="cpu")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
qa_prompt = PromptTemplate(
|
| 38 |
"""<|im_start|>system
|
|
|
|
| 63 |
self.index = load_index_from_storage(storage_context)
|
| 64 |
|
| 65 |
self.query_engine = self.index.as_query_engine(llm=llm, chat_mode=True, similarity_top_k=TOP_K, response_mode="compact", text_qa_template=qa_prompt, memory=ChatMemoryBuffer.from_defaults(token_limit=MAX_HISTORY_TOKENS))
|
| 66 |
+
# self.chat_engine = self.index.as_chat_engine(
|
| 67 |
+
# chat_mode="context",
|
| 68 |
+
# query_engine=self.query_engine, # Injects your custom prompt + settings
|
| 69 |
+
# memory=ChatMemoryBuffer.from_defaults(token_limit=MAX_HISTORY_TOKENS))
|
| 70 |
|
| 71 |
def preprocess_query(self, query: str) -> str:
|
| 72 |
""" Preprocess user query to improve accuracy. """
|
|
|
|
| 78 |
""" Callback """
|
| 79 |
if not message.strip():
|
| 80 |
return "Please, Stick to the questions regarding the Constitutions. Thanks!"
|
| 81 |
+
# for user_msg, bot_msg in history[-3:]: # Last 3 exchanges
|
| 82 |
+
# print ('History:\n')
|
| 83 |
+
# print(user_msg,"\n", bot_msg)
|
| 84 |
+
# print ('Ends..:\n')
|
| 85 |
try:
|
| 86 |
clean_query = self.preprocess_query(message)
|
| 87 |
# query RAG (auto embed, retrives, generate)
|
| 88 |
response = self.query_engine.query(clean_query)
|
| 89 |
+
# response = self.chat_engine.chat(clean_query)
|
| 90 |
+
# print(f"📜 Retrieved context: {response.get_formatted_sources()}")
|
| 91 |
|
| 92 |
if "Not Found" in response.response.lower():
|
| 93 |
return "Its my Bad. Might be there is no information on this topic into the constitution of India or Legal language is too hard for me too.. ;)"
|
|
|
|
| 119 |
if __name__ == "__main__":
|
| 120 |
# Local test
|
| 121 |
demo = create_demo()
|
| 122 |
+
# demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)
|
| 123 |
+
# demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
|
| 124 |
+
demo.launch()
|