Aby commited on
Commit
7cfa59a
·
1 Parent(s): 20f5afd

clean code

Browse files
Files changed (1) hide show
  1. app.py +19 -61
app.py CHANGED
@@ -27,64 +27,12 @@ Settings.embed_model = HuggingFaceEmbedding(model_name=EMBED_MODEL, device="cpu"
27
  # phi3 LLm (downloads ~2GB on first use)
28
  # Model name and its tokenizer name are the same most of the times. check HF for tokenizer name if not found.
29
 
30
- def only_in_case_phi3_model_loading():
31
- # Pre-initialize tokenizer to ensure pad_token is set correctly
32
- tokenizer = AutoTokenizer.from_pretrained(
33
- MODEL_NAME,
34
- trust_remote_code=True,
35
- padding_side="left"
36
- )
37
- # Ensure pad_token is set for Phi-3
38
- if tokenizer.pad_token is None:
39
- tokenizer.pad_token = tokenizer.eos_token
40
- tokenizer.pad_token_id = tokenizer.eos_token_id
41
-
42
- # Create HuggingFaceLLM - try with tokenizer parameter first
43
- try:
44
- llm = HuggingFaceLLM(
45
- model_name=MODEL_NAME,
46
- tokenizer_name=MODEL_NAME,
47
- context_window=4000,
48
- max_new_tokens=512,
49
- device_map="cpu",
50
- model_kwargs={
51
- "trust_remote_code": True,
52
- "low_cpu_mem_usage": True,
53
- "use_safetensors": True
54
- },
55
- tokenizer=tokenizer # Passing tokenizer avoids init error, but may fail later if not properly supported
56
- )
57
- except (TypeError, ValueError):
58
- # If tokenizer parameter not supported, use workaround with __dict__
59
- llm = HuggingFaceLLM(
60
- model_name=MODEL_NAME,
61
- tokenizer_name=MODEL_NAME,
62
- context_window=4000,
63
- max_new_tokens=512,
64
- device_map="cpu",
65
- model_kwargs={
66
- "trust_remote_code": True,
67
- "low_cpu_mem_usage": True,
68
- "use_safetensors": True
69
- },
70
- tokenizer_kwargs={
71
- "trust_remote_code": True,
72
- "padding_side": "left"
73
- }
74
- )
75
- # Bypass Pydantic's __setattr__ to set internal tokenizer attribute
76
- object.__setattr__(llm, '_tokenizer', tokenizer)
77
-
78
- return llm
79
-
80
- # llm = only_in_case_phi3_model_loading()
81
- if (1==1):
82
- llm = HuggingFaceLLM(
83
- model_name=MODEL_NAME,
84
- tokenizer_name=MODEL_NAME,
85
- context_window=32768,
86
- max_new_tokens=512,
87
- device_map="cpu")
88
 
89
  qa_prompt = PromptTemplate(
90
  """<|im_start|>system
@@ -115,6 +63,10 @@ class ConstitutionRAGChatBot:
115
  self.index = load_index_from_storage(storage_context)
116
 
117
  self.query_engine = self.index.as_query_engine(llm=llm, chat_mode=True, similarity_top_k=TOP_K, response_mode="compact", text_qa_template=qa_prompt, memory=ChatMemoryBuffer.from_defaults(token_limit=MAX_HISTORY_TOKENS))
 
 
 
 
118
 
119
  def preprocess_query(self, query: str) -> str:
120
  """ Preprocess user query to improve accuracy. """
@@ -126,11 +78,16 @@ class ConstitutionRAGChatBot:
126
  """ Callback """
127
  if not message.strip():
128
  return "Please, Stick to the questions regarding the Constitutions. Thanks!"
129
-
 
 
 
130
  try:
131
  clean_query = self.preprocess_query(message)
132
  # query RAG (auto embed, retrives, generate)
133
  response = self.query_engine.query(clean_query)
 
 
134
 
135
  if "Not Found" in response.response.lower():
136
  return "Its my Bad. Might be there is no information on this topic into the constitution of India or Legal language is too hard for me too.. ;)"
@@ -162,5 +119,6 @@ def create_demo():
162
  if __name__ == "__main__":
163
  # Local test
164
  demo = create_demo()
165
- #demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)
166
- demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
 
 
27
  # phi3 LLm (downloads ~2GB on first use)
28
  # Model name and its tokenizer name are the same most of the times. check HF for tokenizer name if not found.
29
 
30
+ llm = HuggingFaceLLM(
31
+ model_name=MODEL_NAME,
32
+ tokenizer_name=MODEL_NAME,
33
+ context_window=32768,
34
+ max_new_tokens=512,
35
+ device_map="cpu")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  qa_prompt = PromptTemplate(
38
  """<|im_start|>system
 
63
  self.index = load_index_from_storage(storage_context)
64
 
65
  self.query_engine = self.index.as_query_engine(llm=llm, chat_mode=True, similarity_top_k=TOP_K, response_mode="compact", text_qa_template=qa_prompt, memory=ChatMemoryBuffer.from_defaults(token_limit=MAX_HISTORY_TOKENS))
66
+ # self.chat_engine = self.index.as_chat_engine(
67
+ # chat_mode="context",
68
+ # query_engine=self.query_engine, # Injects your custom prompt + settings
69
+ # memory=ChatMemoryBuffer.from_defaults(token_limit=MAX_HISTORY_TOKENS))
70
 
71
  def preprocess_query(self, query: str) -> str:
72
  """ Preprocess user query to improve accuracy. """
 
78
  """ Callback """
79
  if not message.strip():
80
  return "Please, Stick to the questions regarding the Constitutions. Thanks!"
81
+ # for user_msg, bot_msg in history[-3:]: # Last 3 exchanges
82
+ # print ('History:\n')
83
+ # print(user_msg,"\n", bot_msg)
84
+ # print ('Ends..:\n')
85
  try:
86
  clean_query = self.preprocess_query(message)
87
  # query RAG (auto embed, retrives, generate)
88
  response = self.query_engine.query(clean_query)
89
+ # response = self.chat_engine.chat(clean_query)
90
+ # print(f"📜 Retrieved context: {response.get_formatted_sources()}")
91
 
92
  if "Not Found" in response.response.lower():
93
  return "Its my Bad. Might be there is no information on this topic into the constitution of India or Legal language is too hard for me too.. ;)"
 
119
  if __name__ == "__main__":
120
  # Local test
121
  demo = create_demo()
122
+ # demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)
123
+ # demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
124
+ demo.launch()