Spaces:

ICTuniverse
/

reranking

Sleeping

App Files Files Community

ICTuniverse commited on Mar 13, 2025

Commit

32563c3

verified ·

1 Parent(s): 8c8b611

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -61

app.py CHANGED Viewed

@@ -1,79 +1,58 @@
-# import os
-# # Set Java paths manually
-# os.environ["JAVA_HOME"] = "/usr/local/lib/jvm/java-17-openjdk-amd64"
-# os.environ["JVM_PATH"] = "/usr/local/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so"
-# os.environ["PATH"] = os.environ["JAVA_HOME"] + "/bin:" + os.environ["PATH"]
-# from flask import Flask, request, jsonify
-# from sentence_transformers import CrossEncoder
-# import py_vncorenlp
-# app = Flask(__name__)
-# save_dir_vncore = "/home/user/app/vncorenlp"
-# rdrsegmenter = py_vncorenlp.VnCoreNLP(annotators=["wseg"], save_dir=save_dir_vncore)
-# # Load your cross-encoder model
-# model_name = "truong1301/reranker_pho_BLAI"  # Replace with your actual model if different
-# cross_encoder = CrossEncoder(model_name, max_length=256, num_labels=1)
-# # Function to preprocess text with Vietnamese word segmentation
-# def preprocess_text(text):
-#     if not text:
-#         return text
-#     segmented_text = rdrsegmenter.word_segment(text)
-#     # Join tokenized sentences into a single string
-#     return " ".join([" ".join(sentence) for sentence in segmented_text])
-# @app.route("/rerank", methods=["POST"])
-# def rerank():
-#     try:
-#         # Get JSON data from the request (query and list of documents)
-#         data = request.get_json()
-#         query = data.get("query", "")
-#         documents = data.get("documents", [])
-#         if not query or not documents:
-#             return jsonify({"error": "Missing query or documents"}), 400
-#         # Preprocess query and documents with vncorenlp
-#         processed_query = preprocess_text(query)
-#         processed_docs = [preprocess_text(doc) for doc in documents]
-#         # Create pairs of query and documents for reranking
-#         query_doc_pairs = [(processed_query, doc) for doc in processed_docs]
-#         # Get reranking scores from the cross-encoder
-#         scores = cross_encoder.predict(query_doc_pairs).tolist()
-#         # Combine documents with their scores and sort
-#         ranked_results = sorted(
-#             [{"document": doc, "score": score} for doc, score in zip(documents, scores)],
-#             key=lambda x: x["score"],
-#             reverse=True
-#         )
-#         return jsonify({"results": ranked_results})
-#     except Exception as e:
-#         return jsonify({"error": str(e)}), 500
-# @app.route("/", methods=["GET"])
-# def health_check():
-#     return jsonify({"status": "Server is running"}), 200
-# if __name__ == "__main__":
-#     app.run(host="0.0.0.0", port=7860)  # Default port for Hugging Face Spaces
-import os
-import subprocess
-# Find libjvm.so
-output = subprocess.run("find /usr/lib/jvm -name libjvm.so", shell=True, capture_output=True, text=True)
-print("🔍 Searching for libjvm.so...")
-print(output.stdout or "❌ libjvm.so not found!")
-# Print JAVA_HOME and PATH
-print(f"JAVA_HOME: {os.environ.get('JAVA_HOME', 'Not Set')}")
-print(f"PATH: {os.environ.get('PATH', 'Not Set')}")

+from flask import Flask, request, jsonify
+from sentence_transformers import CrossEncoder
+app = Flask(__name__)
+# Load your cross-encoder model
+model_name = "truong1301/reranker_pho_BLAI"  # Replace with your actual model if different
+cross_encoder = CrossEncoder(model_name, max_length=256, num_labels=1)
+# Function to preprocess text with Vietnamese word segmentation
+def preprocess_text(text):
+    if not text:
+        return text
+    segmented_text = rdrsegmenter.word_segment(text)
+    # Join tokenized sentences into a single string
+    return " ".join([" ".join(sentence) for sentence in segmented_text])
+@app.route("/rerank", methods=["POST"])
+def rerank():
+    try:
+        # Get JSON data from the request (query and list of documents)
+        data = request.get_json()
+        query = data.get("query", "")
+        documents = data.get("documents", [])
+        if not query or not documents:
+            return jsonify({"error": "Missing query or documents"}), 400
+        # Create pairs of query and documents for reranking
+        query_doc_pairs = [(query, doc) for doc in documents]
+        # Get reranking scores from the cross-encoder
+        scores = cross_encoder.predict(query_doc_pairs).tolist()
+        # Combine documents with their scores and sort
+        ranked_results = sorted(
+            [{"document": doc, "score": score} for doc, score in zip(documents, scores)],
+            key=lambda x: x["score"],
+            reverse=True
+        )
+        return jsonify({"results": ranked_results})
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+@app.route("/", methods=["GET"])
+def health_check():
+    return jsonify({"status": "Server is running"}), 200
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860)  # Default port for Hugging Face Spaces