ICTuniverse commited on
Commit
4f56acc
·
verified ·
1 Parent(s): 8bd76ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -51
app.py CHANGED
@@ -1,68 +1,79 @@
1
- import os
 
 
 
 
 
2
 
3
- # Set Java paths manually
4
- os.environ["JAVA_HOME"] = "/usr/local/lib/jvm/java-17-openjdk-amd64"
5
- os.environ["JVM_PATH"] = "/usr/local/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so"
6
- os.environ["PATH"] = os.environ["JAVA_HOME"] + "/bin:" + os.environ["PATH"]
7
 
8
- from flask import Flask, request, jsonify
9
- from sentence_transformers import CrossEncoder
10
- import py_vncorenlp
11
 
 
 
 
12
 
13
- app = Flask(__name__)
14
- save_dir_vncore = "/home/user/app/vncorenlp"
15
- rdrsegmenter = py_vncorenlp.VnCoreNLP(annotators=["wseg"], save_dir=save_dir_vncore)
16
 
 
 
 
17
 
18
- # Load your cross-encoder model
19
- model_name = "truong1301/reranker_pho_BLAI" # Replace with your actual model if different
20
- cross_encoder = CrossEncoder(model_name, max_length=256, num_labels=1)
 
 
 
 
21
 
22
- # Function to preprocess text with Vietnamese word segmentation
23
- def preprocess_text(text):
24
- if not text:
25
- return text
26
- segmented_text = rdrsegmenter.word_segment(text)
27
- # Join tokenized sentences into a single string
28
- return " ".join([" ".join(sentence) for sentence in segmented_text])
29
 
30
- @app.route("/rerank", methods=["POST"])
31
- def rerank():
32
- try:
33
- # Get JSON data from the request (query and list of documents)
34
- data = request.get_json()
35
- query = data.get("query", "")
36
- documents = data.get("documents", [])
37
 
38
- if not query or not documents:
39
- return jsonify({"error": "Missing query or documents"}), 400
 
40
 
41
- # Preprocess query and documents with vncorenlp
42
- processed_query = preprocess_text(query)
43
- processed_docs = [preprocess_text(doc) for doc in documents]
44
 
45
- # Create pairs of query and documents for reranking
46
- query_doc_pairs = [(processed_query, doc) for doc in processed_docs]
47
 
48
- # Get reranking scores from the cross-encoder
49
- scores = cross_encoder.predict(query_doc_pairs).tolist()
 
 
 
 
50
 
51
- # Combine documents with their scores and sort
52
- ranked_results = sorted(
53
- [{"document": doc, "score": score} for doc, score in zip(documents, scores)],
54
- key=lambda x: x["score"],
55
- reverse=True
56
- )
57
 
58
- return jsonify({"results": ranked_results})
 
59
 
60
- except Exception as e:
61
- return jsonify({"error": str(e)}), 500
 
 
 
 
 
 
62
 
63
- @app.route("/", methods=["GET"])
64
- def health_check():
65
- return jsonify({"status": "Server is running"}), 200
 
66
 
67
- if __name__ == "__main__":
68
- app.run(host="0.0.0.0", port=7860) # Default port for Hugging Face Spaces
 
 
1
+ # import os
2
+
3
+ # # Set Java paths manually
4
+ # os.environ["JAVA_HOME"] = "/usr/local/lib/jvm/java-17-openjdk-amd64"
5
+ # os.environ["JVM_PATH"] = "/usr/local/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so"
6
+ # os.environ["PATH"] = os.environ["JAVA_HOME"] + "/bin:" + os.environ["PATH"]
7
 
8
+ # from flask import Flask, request, jsonify
9
+ # from sentence_transformers import CrossEncoder
10
+ # import py_vncorenlp
 
11
 
 
 
 
12
 
13
+ # app = Flask(__name__)
14
+ # save_dir_vncore = "/home/user/app/vncorenlp"
15
+ # rdrsegmenter = py_vncorenlp.VnCoreNLP(annotators=["wseg"], save_dir=save_dir_vncore)
16
 
 
 
 
17
 
18
+ # # Load your cross-encoder model
19
+ # model_name = "truong1301/reranker_pho_BLAI" # Replace with your actual model if different
20
+ # cross_encoder = CrossEncoder(model_name, max_length=256, num_labels=1)
21
 
22
+ # # Function to preprocess text with Vietnamese word segmentation
23
+ # def preprocess_text(text):
24
+ # if not text:
25
+ # return text
26
+ # segmented_text = rdrsegmenter.word_segment(text)
27
+ # # Join tokenized sentences into a single string
28
+ # return " ".join([" ".join(sentence) for sentence in segmented_text])
29
 
30
+ # @app.route("/rerank", methods=["POST"])
31
+ # def rerank():
32
+ # try:
33
+ # # Get JSON data from the request (query and list of documents)
34
+ # data = request.get_json()
35
+ # query = data.get("query", "")
36
+ # documents = data.get("documents", [])
37
 
38
+ # if not query or not documents:
39
+ # return jsonify({"error": "Missing query or documents"}), 400
 
 
 
 
 
40
 
41
+ # # Preprocess query and documents with vncorenlp
42
+ # processed_query = preprocess_text(query)
43
+ # processed_docs = [preprocess_text(doc) for doc in documents]
44
 
45
+ # # Create pairs of query and documents for reranking
46
+ # query_doc_pairs = [(processed_query, doc) for doc in processed_docs]
 
47
 
48
+ # # Get reranking scores from the cross-encoder
49
+ # scores = cross_encoder.predict(query_doc_pairs).tolist()
50
 
51
+ # # Combine documents with their scores and sort
52
+ # ranked_results = sorted(
53
+ # [{"document": doc, "score": score} for doc, score in zip(documents, scores)],
54
+ # key=lambda x: x["score"],
55
+ # reverse=True
56
+ # )
57
 
58
+ # return jsonify({"results": ranked_results})
 
 
 
 
 
59
 
60
+ # except Exception as e:
61
+ # return jsonify({"error": str(e)}), 500
62
 
63
+ # @app.route("/", methods=["GET"])
64
+ # def health_check():
65
+ # return jsonify({"status": "Server is running"}), 200
66
+
67
+ # if __name__ == "__main__":
68
+ # app.run(host="0.0.0.0", port=7860) # Default port for Hugging Face Spaces
69
+ import os
70
+ import subprocess
71
 
72
+ # Find libjvm.so
73
+ output = subprocess.run("find /usr/lib/jvm -name libjvm.so", shell=True, capture_output=True, text=True)
74
+ print("🔍 Searching for libjvm.so...")
75
+ print(output.stdout or "❌ libjvm.so not found!")
76
 
77
+ # Print JAVA_HOME and PATH
78
+ print(f"JAVA_HOME: {os.environ.get('JAVA_HOME', 'Not Set')}")
79
+ print(f"PATH: {os.environ.get('PATH', 'Not Set')}")