Spaces:

shimaa22
/

testapi

Sleeping

App Files Files Community

shimaa22 commited on Jan 16

Commit

b8028fc

verified ·

1 Parent(s): c1821ba

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -20

app.py CHANGED Viewed

@@ -11,7 +11,8 @@ import uvicorn
 # ===== CONFIG =====
 INDEX_PATH = "faiss.index"
 META_PATH = "metadata.pkl.gz"
-MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"  # خفيف ومتاح للـ Free Space
 # ===== LOAD FAISS INDEX =====
 index = faiss.read_index(INDEX_PATH)
@@ -33,33 +34,50 @@ class Query(BaseModel):
     text: str
     k: int = 5  # أعلى 5 مشابهين افتراضي
 # ===== PREDICTION ROUTE =====
 @app.post("/predict")
 def predict(query: Query):
-    # ===== EMBEDDING =====
-    q_emb = model.encode([query.text]).astype("float32")
-    distances, indices = index.search(q_emb, query.k)
-    top_statuses = []
-    results = []
-    for rank, idx in enumerate(indices[0]):
-        status = statuses[idx]
-        top_statuses.append(status)
-        results.append({
-            "rank": rank + 1,
-            "text": texts[idx],
-            "status": status,
-            "distance": float(distances[0][rank])
-        })
     # ===== VOTING =====
-    vote = Counter(top_statuses).most_common(1)[0]
     return {
         "prediction": vote[0],
-        "votes": dict(Counter(top_statuses)),
-        "top_k": results
     }
 # ===== RUN IF MAIN =====

 # ===== CONFIG =====
 INDEX_PATH = "faiss.index"
 META_PATH = "metadata.pkl.gz"
+MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
+CHUNK_SIZE = 2000  # طول كل chunk بال characters
 # ===== LOAD FAISS INDEX =====
 index = faiss.read_index(INDEX_PATH)
     text: str
     k: int = 5  # أعلى 5 مشابهين افتراضي
+# ===== HELPER: split long text into chunks =====
+def split_text(text, chunk_size=CHUNK_SIZE):
+    chunks = []
+    for i in range(0, len(text), chunk_size):
+        chunks.append(text[i:i+chunk_size])
+    return chunks
 # ===== PREDICTION ROUTE =====
 @app.post("/predict")
 def predict(query: Query):
+    text_chunks = split_text(query.text)
+    all_top_statuses = []
+    all_results = []
+    for chunk in text_chunks:
+        # Escape backslashes
+        chunk = chunk.replace("\\", "\\\\")
+        # ===== EMBEDDING =====
+        q_emb = model.encode([chunk]).astype("float32")
+        distances, indices = index.search(q_emb, query.k)
+        top_statuses = []
+        results = []
+        for rank, idx in enumerate(indices[0]):
+            status = statuses[idx]
+            top_statuses.append(status)
+            results.append({
+                "rank": rank + 1,
+                "text": texts[idx],
+                "status": status,
+                "distance": float(distances[0][rank])
+            })
+        all_top_statuses.extend(top_statuses)
+        all_results.extend(results)
     # ===== VOTING =====
+    vote = Counter(all_top_statuses).most_common(1)[0]
     return {
         "prediction": vote[0],
+        "votes": dict(Counter(all_top_statuses)),
+        "top_k": all_results[:query.k]  # أعلى k من كل النتائج
     }
 # ===== RUN IF MAIN =====