Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,13 +12,12 @@ os.environ["XDG_CACHE_HOME"] = "/tmp/.cache"
|
|
| 12 |
os.environ["HOME"] = "/tmp"
|
| 13 |
for p in ["/tmp/hf_home","/tmp/hf_cache","/tmp/hf_datasets","/tmp/.cache"]:
|
| 14 |
os.makedirs(p, exist_ok=True)
|
| 15 |
-
# Xóa cache cũ nếu có
|
| 16 |
shutil.rmtree("/.cache", ignore_errors=True)
|
| 17 |
|
| 18 |
# =================== #
|
| 19 |
# Import thư viện #
|
| 20 |
# =================== #
|
| 21 |
-
import
|
| 22 |
from flask import Flask, request, jsonify
|
| 23 |
from flask_cors import CORS
|
| 24 |
import numpy as np
|
|
@@ -75,16 +74,12 @@ except Exception as e:
|
|
| 75 |
# ================ #
|
| 76 |
faiss_index = faiss.read_index(FAISS_PATH)
|
| 77 |
with gzip.open(BM25_PATH,"rb") as f: bm25 = pickle.load(f)
|
| 78 |
-
with gzip.open(METAS_PATH,"rb") as f:
|
| 79 |
-
metas = pickle.load(f)
|
| 80 |
if isinstance(metas,dict) and "corpus" in metas:
|
| 81 |
corpus = metas["corpus"]
|
| 82 |
else:
|
| 83 |
corpus = metas
|
| 84 |
|
| 85 |
-
# Lưu list key để tránh tạo lại nhiều lần
|
| 86 |
-
meta_keys = list(range(len(corpus)))
|
| 87 |
-
|
| 88 |
# Load embedding model
|
| 89 |
device = os.environ.get("DEVICE","cpu")
|
| 90 |
embedding_model = SentenceTransformer(EMB_MODEL, device=device)
|
|
@@ -107,8 +102,6 @@ if API_KEY:
|
|
| 107 |
except Exception as e:
|
| 108 |
print("Warning: cannot init GenAI:", e)
|
| 109 |
|
| 110 |
-
answer_cache = TTLCache(maxsize=CACHE_MAX, ttl=CACHE_TTL)
|
| 111 |
-
|
| 112 |
# =================== #
|
| 113 |
# Utility / Retrieve #
|
| 114 |
# =================== #
|
|
@@ -117,7 +110,6 @@ def minmax_scale(arr):
|
|
| 117 |
return np.zeros_like(arr) if len(arr)==0 or np.max(arr)==np.min(arr) else (arr-np.min(arr))/(np.max(arr)-np.min(arr))
|
| 118 |
|
| 119 |
def classify_followup(text:str)->int:
|
| 120 |
-
# như code gốc, bỏ bớt regex nặng để nhanh hơn
|
| 121 |
t=text.lower().strip()
|
| 122 |
if len(t.split())<=4: return 0
|
| 123 |
if re.search(r"\b(nó|cái này|thế thì|vậy thì)\b",t): return 0
|
|
@@ -152,12 +144,13 @@ def get_full_procedure_text_by_parent(pid):
|
|
| 152 |
field_map={"ten_thu_tuc":"Tên thủ tục","cach_thuc_thuc_hien":"Cách thức thực hiện","thanh_phan_ho_so":"Thành phần hồ sơ","trinh_tu_thuc_hien":"Trình tự thực hiện","co_quan_thuc_hien":"Cơ quan thực hiện","yeu_cau_dieu_kien":"Yêu cầu, điều kiện","nguon":"Nguồn"}
|
| 153 |
return "\n\n".join([f"{field_map[k]}:\n{v}" for k,v in rec.items() if k in field_map and v])
|
| 154 |
|
| 155 |
-
#
|
| 156 |
-
# Flask endpoints
|
| 157 |
-
#
|
| 158 |
app=Flask(__name__)
|
| 159 |
CORS(app)
|
| 160 |
chat_histories={}
|
|
|
|
| 161 |
|
| 162 |
@app.route("/health")
|
| 163 |
def health(): return {"status":"ok"}
|
|
|
|
| 12 |
os.environ["HOME"] = "/tmp"
|
| 13 |
for p in ["/tmp/hf_home","/tmp/hf_cache","/tmp/hf_datasets","/tmp/.cache"]:
|
| 14 |
os.makedirs(p, exist_ok=True)
|
|
|
|
| 15 |
shutil.rmtree("/.cache", ignore_errors=True)
|
| 16 |
|
| 17 |
# =================== #
|
| 18 |
# Import thư viện #
|
| 19 |
# =================== #
|
| 20 |
+
import gzip, pickle, json, re
|
| 21 |
from flask import Flask, request, jsonify
|
| 22 |
from flask_cors import CORS
|
| 23 |
import numpy as np
|
|
|
|
| 74 |
# ================ #
|
| 75 |
faiss_index = faiss.read_index(FAISS_PATH)
|
| 76 |
with gzip.open(BM25_PATH,"rb") as f: bm25 = pickle.load(f)
|
| 77 |
+
with gzip.open(METAS_PATH,"rb") as f: metas = pickle.load(f)
|
|
|
|
| 78 |
if isinstance(metas,dict) and "corpus" in metas:
|
| 79 |
corpus = metas["corpus"]
|
| 80 |
else:
|
| 81 |
corpus = metas
|
| 82 |
|
|
|
|
|
|
|
|
|
|
| 83 |
# Load embedding model
|
| 84 |
device = os.environ.get("DEVICE","cpu")
|
| 85 |
embedding_model = SentenceTransformer(EMB_MODEL, device=device)
|
|
|
|
| 102 |
except Exception as e:
|
| 103 |
print("Warning: cannot init GenAI:", e)
|
| 104 |
|
|
|
|
|
|
|
| 105 |
# =================== #
|
| 106 |
# Utility / Retrieve #
|
| 107 |
# =================== #
|
|
|
|
| 110 |
return np.zeros_like(arr) if len(arr)==0 or np.max(arr)==np.min(arr) else (arr-np.min(arr))/(np.max(arr)-np.min(arr))
|
| 111 |
|
| 112 |
def classify_followup(text:str)->int:
|
|
|
|
| 113 |
t=text.lower().strip()
|
| 114 |
if len(t.split())<=4: return 0
|
| 115 |
if re.search(r"\b(nó|cái này|thế thì|vậy thì)\b",t): return 0
|
|
|
|
| 144 |
field_map={"ten_thu_tuc":"Tên thủ tục","cach_thuc_thuc_hien":"Cách thức thực hiện","thanh_phan_ho_so":"Thành phần hồ sơ","trinh_tu_thuc_hien":"Trình tự thực hiện","co_quan_thuc_hien":"Cơ quan thực hiện","yeu_cau_dieu_kien":"Yêu cầu, điều kiện","nguon":"Nguồn"}
|
| 145 |
return "\n\n".join([f"{field_map[k]}:\n{v}" for k,v in rec.items() if k in field_map and v])
|
| 146 |
|
| 147 |
+
# =================== #
|
| 148 |
+
# Flask endpoints #
|
| 149 |
+
# =================== #
|
| 150 |
app=Flask(__name__)
|
| 151 |
CORS(app)
|
| 152 |
chat_histories={}
|
| 153 |
+
answer_cache = TTLCache(maxsize=CACHE_MAX, ttl=CACHE_TTL)
|
| 154 |
|
| 155 |
@app.route("/health")
|
| 156 |
def health(): return {"status":"ok"}
|