ICTuniverse commited on
Commit
32563c3
·
verified ·
1 Parent(s): 8c8b611

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -61
app.py CHANGED
@@ -1,79 +1,58 @@
1
- # import os
 
2
 
3
- # # Set Java paths manually
4
- # os.environ["JAVA_HOME"] = "/usr/local/lib/jvm/java-17-openjdk-amd64"
5
- # os.environ["JVM_PATH"] = "/usr/local/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so"
6
- # os.environ["PATH"] = os.environ["JAVA_HOME"] + "/bin:" + os.environ["PATH"]
7
 
8
- # from flask import Flask, request, jsonify
9
- # from sentence_transformers import CrossEncoder
10
- # import py_vncorenlp
11
 
 
12
 
13
- # app = Flask(__name__)
14
- # save_dir_vncore = "/home/user/app/vncorenlp"
15
- # rdrsegmenter = py_vncorenlp.VnCoreNLP(annotators=["wseg"], save_dir=save_dir_vncore)
16
 
 
 
 
17
 
18
- # # Load your cross-encoder model
19
- # model_name = "truong1301/reranker_pho_BLAI" # Replace with your actual model if different
20
- # cross_encoder = CrossEncoder(model_name, max_length=256, num_labels=1)
 
 
 
 
21
 
22
- # # Function to preprocess text with Vietnamese word segmentation
23
- # def preprocess_text(text):
24
- # if not text:
25
- # return text
26
- # segmented_text = rdrsegmenter.word_segment(text)
27
- # # Join tokenized sentences into a single string
28
- # return " ".join([" ".join(sentence) for sentence in segmented_text])
29
 
30
- # @app.route("/rerank", methods=["POST"])
31
- # def rerank():
32
- # try:
33
- # # Get JSON data from the request (query and list of documents)
34
- # data = request.get_json()
35
- # query = data.get("query", "")
36
- # documents = data.get("documents", [])
37
 
38
- # if not query or not documents:
39
- # return jsonify({"error": "Missing query or documents"}), 400
40
 
41
- # # Preprocess query and documents with vncorenlp
42
- # processed_query = preprocess_text(query)
43
- # processed_docs = [preprocess_text(doc) for doc in documents]
44
 
45
- # # Create pairs of query and documents for reranking
46
- # query_doc_pairs = [(processed_query, doc) for doc in processed_docs]
 
 
 
 
47
 
48
- # # Get reranking scores from the cross-encoder
49
- # scores = cross_encoder.predict(query_doc_pairs).tolist()
50
 
51
- # # Combine documents with their scores and sort
52
- # ranked_results = sorted(
53
- # [{"document": doc, "score": score} for doc, score in zip(documents, scores)],
54
- # key=lambda x: x["score"],
55
- # reverse=True
56
- # )
57
 
58
- # return jsonify({"results": ranked_results})
 
 
59
 
60
- # except Exception as e:
61
- # return jsonify({"error": str(e)}), 500
62
 
63
- # @app.route("/", methods=["GET"])
64
- # def health_check():
65
- # return jsonify({"status": "Server is running"}), 200
66
 
67
- # if __name__ == "__main__":
68
- # app.run(host="0.0.0.0", port=7860) # Default port for Hugging Face Spaces
69
- import os
70
- import subprocess
71
 
72
- # Find libjvm.so
73
- output = subprocess.run("find /usr/lib/jvm -name libjvm.so", shell=True, capture_output=True, text=True)
74
- print("🔍 Searching for libjvm.so...")
75
- print(output.stdout or "❌ libjvm.so not found!")
76
-
77
- # Print JAVA_HOME and PATH
78
- print(f"JAVA_HOME: {os.environ.get('JAVA_HOME', 'Not Set')}")
79
- print(f"PATH: {os.environ.get('PATH', 'Not Set')}")
 
1
+ from flask import Flask, request, jsonify
2
+ from sentence_transformers import CrossEncoder
3
 
 
 
 
 
4
 
 
 
 
5
 
6
+ app = Flask(__name__)
7
 
 
 
 
8
 
9
+ # Load your cross-encoder model
10
+ model_name = "truong1301/reranker_pho_BLAI" # Replace with your actual model if different
11
+ cross_encoder = CrossEncoder(model_name, max_length=256, num_labels=1)
12
 
13
+ # Function to preprocess text with Vietnamese word segmentation
14
+ def preprocess_text(text):
15
+ if not text:
16
+ return text
17
+ segmented_text = rdrsegmenter.word_segment(text)
18
+ # Join tokenized sentences into a single string
19
+ return " ".join([" ".join(sentence) for sentence in segmented_text])
20
 
21
+ @app.route("/rerank", methods=["POST"])
22
+ def rerank():
23
+ try:
24
+ # Get JSON data from the request (query and list of documents)
25
+ data = request.get_json()
26
+ query = data.get("query", "")
27
+ documents = data.get("documents", [])
28
 
29
+ if not query or not documents:
30
+ return jsonify({"error": "Missing query or documents"}), 400
 
 
 
 
 
31
 
32
+ # Create pairs of query and documents for reranking
33
+ query_doc_pairs = [(query, doc) for doc in documents]
34
 
35
+ # Get reranking scores from the cross-encoder
36
+ scores = cross_encoder.predict(query_doc_pairs).tolist()
 
37
 
38
+ # Combine documents with their scores and sort
39
+ ranked_results = sorted(
40
+ [{"document": doc, "score": score} for doc, score in zip(documents, scores)],
41
+ key=lambda x: x["score"],
42
+ reverse=True
43
+ )
44
 
45
+ return jsonify({"results": ranked_results})
 
46
 
47
+ except Exception as e:
48
+ return jsonify({"error": str(e)}), 500
 
 
 
 
49
 
50
+ @app.route("/", methods=["GET"])
51
+ def health_check():
52
+ return jsonify({"status": "Server is running"}), 200
53
 
54
+ if __name__ == "__main__":
55
+ app.run(host="0.0.0.0", port=7860) # Default port for Hugging Face Spaces
56
 
 
 
 
57
 
 
 
 
 
58