DrPie commited on
Commit
ce128fd
·
verified ·
1 Parent(s): d80e6d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -13
app.py CHANGED
@@ -12,13 +12,12 @@ os.environ["XDG_CACHE_HOME"] = "/tmp/.cache"
12
  os.environ["HOME"] = "/tmp"
13
  for p in ["/tmp/hf_home","/tmp/hf_cache","/tmp/hf_datasets","/tmp/.cache"]:
14
  os.makedirs(p, exist_ok=True)
15
- # Xóa cache cũ nếu có
16
  shutil.rmtree("/.cache", ignore_errors=True)
17
 
18
  # =================== #
19
  # Import thư viện #
20
  # =================== #
21
- import time, hashlib, gzip, pickle, json, traceback, re
22
  from flask import Flask, request, jsonify
23
  from flask_cors import CORS
24
  import numpy as np
@@ -75,16 +74,12 @@ except Exception as e:
75
  # ================ #
76
  faiss_index = faiss.read_index(FAISS_PATH)
77
  with gzip.open(BM25_PATH,"rb") as f: bm25 = pickle.load(f)
78
- with gzip.open(METAS_PATH,"rb") as f:
79
- metas = pickle.load(f)
80
  if isinstance(metas,dict) and "corpus" in metas:
81
  corpus = metas["corpus"]
82
  else:
83
  corpus = metas
84
 
85
- # Lưu list key để tránh tạo lại nhiều lần
86
- meta_keys = list(range(len(corpus)))
87
-
88
  # Load embedding model
89
  device = os.environ.get("DEVICE","cpu")
90
  embedding_model = SentenceTransformer(EMB_MODEL, device=device)
@@ -107,8 +102,6 @@ if API_KEY:
107
  except Exception as e:
108
  print("Warning: cannot init GenAI:", e)
109
 
110
- answer_cache = TTLCache(maxsize=CACHE_MAX, ttl=CACHE_TTL)
111
-
112
  # =================== #
113
  # Utility / Retrieve #
114
  # =================== #
@@ -117,7 +110,6 @@ def minmax_scale(arr):
117
  return np.zeros_like(arr) if len(arr)==0 or np.max(arr)==np.min(arr) else (arr-np.min(arr))/(np.max(arr)-np.min(arr))
118
 
119
  def classify_followup(text:str)->int:
120
- # như code gốc, bỏ bớt regex nặng để nhanh hơn
121
  t=text.lower().strip()
122
  if len(t.split())<=4: return 0
123
  if re.search(r"\b(nó|cái này|thế thì|vậy thì)\b",t): return 0
@@ -152,12 +144,13 @@ def get_full_procedure_text_by_parent(pid):
152
  field_map={"ten_thu_tuc":"Tên thủ tục","cach_thuc_thuc_hien":"Cách thức thực hiện","thanh_phan_ho_so":"Thành phần hồ sơ","trinh_tu_thuc_hien":"Trình tự thực hiện","co_quan_thuc_hien":"Cơ quan thực hiện","yeu_cau_dieu_kien":"Yêu cầu, điều kiện","nguon":"Nguồn"}
153
  return "\n\n".join([f"{field_map[k]}:\n{v}" for k,v in rec.items() if k in field_map and v])
154
 
155
- # ================ #
156
- # Flask endpoints #
157
- # ================ #
158
  app=Flask(__name__)
159
  CORS(app)
160
  chat_histories={}
 
161
 
162
  @app.route("/health")
163
  def health(): return {"status":"ok"}
 
12
  os.environ["HOME"] = "/tmp"
13
  for p in ["/tmp/hf_home","/tmp/hf_cache","/tmp/hf_datasets","/tmp/.cache"]:
14
  os.makedirs(p, exist_ok=True)
 
15
  shutil.rmtree("/.cache", ignore_errors=True)
16
 
17
  # =================== #
18
  # Import thư viện #
19
  # =================== #
20
+ import gzip, pickle, json, re
21
  from flask import Flask, request, jsonify
22
  from flask_cors import CORS
23
  import numpy as np
 
74
  # ================ #
75
  faiss_index = faiss.read_index(FAISS_PATH)
76
  with gzip.open(BM25_PATH,"rb") as f: bm25 = pickle.load(f)
77
+ with gzip.open(METAS_PATH,"rb") as f: metas = pickle.load(f)
 
78
  if isinstance(metas,dict) and "corpus" in metas:
79
  corpus = metas["corpus"]
80
  else:
81
  corpus = metas
82
 
 
 
 
83
  # Load embedding model
84
  device = os.environ.get("DEVICE","cpu")
85
  embedding_model = SentenceTransformer(EMB_MODEL, device=device)
 
102
  except Exception as e:
103
  print("Warning: cannot init GenAI:", e)
104
 
 
 
105
  # =================== #
106
  # Utility / Retrieve #
107
  # =================== #
 
110
  return np.zeros_like(arr) if len(arr)==0 or np.max(arr)==np.min(arr) else (arr-np.min(arr))/(np.max(arr)-np.min(arr))
111
 
112
  def classify_followup(text:str)->int:
 
113
  t=text.lower().strip()
114
  if len(t.split())<=4: return 0
115
  if re.search(r"\b(nó|cái này|thế thì|vậy thì)\b",t): return 0
 
144
  field_map={"ten_thu_tuc":"Tên thủ tục","cach_thuc_thuc_hien":"Cách thức thực hiện","thanh_phan_ho_so":"Thành phần hồ sơ","trinh_tu_thuc_hien":"Trình tự thực hiện","co_quan_thuc_hien":"Cơ quan thực hiện","yeu_cau_dieu_kien":"Yêu cầu, điều kiện","nguon":"Nguồn"}
145
  return "\n\n".join([f"{field_map[k]}:\n{v}" for k,v in rec.items() if k in field_map and v])
146
 
147
+ # =================== #
148
+ # Flask endpoints #
149
+ # =================== #
150
  app=Flask(__name__)
151
  CORS(app)
152
  chat_histories={}
153
+ answer_cache = TTLCache(maxsize=CACHE_MAX, ttl=CACHE_TTL)
154
 
155
  @app.route("/health")
156
  def health(): return {"status":"ok"}