shimaa22 commited on
Commit
b8028fc
·
verified ·
1 Parent(s): c1821ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -20
app.py CHANGED
@@ -11,7 +11,8 @@ import uvicorn
11
  # ===== CONFIG =====
12
  INDEX_PATH = "faiss.index"
13
  META_PATH = "metadata.pkl.gz"
14
- MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" # خفيف ومتاح للـ Free Space
 
15
 
16
  # ===== LOAD FAISS INDEX =====
17
  index = faiss.read_index(INDEX_PATH)
@@ -33,33 +34,50 @@ class Query(BaseModel):
33
  text: str
34
  k: int = 5 # أعلى 5 مشابهين افتراضي
35
 
 
 
 
 
 
 
 
36
  # ===== PREDICTION ROUTE =====
37
  @app.post("/predict")
38
  def predict(query: Query):
39
- # ===== EMBEDDING =====
40
- q_emb = model.encode([query.text]).astype("float32")
41
- distances, indices = index.search(q_emb, query.k)
42
-
43
- top_statuses = []
44
- results = []
45
-
46
- for rank, idx in enumerate(indices[0]):
47
- status = statuses[idx]
48
- top_statuses.append(status)
49
- results.append({
50
- "rank": rank + 1,
51
- "text": texts[idx],
52
- "status": status,
53
- "distance": float(distances[0][rank])
54
- })
 
 
 
 
 
 
 
 
 
 
55
 
56
  # ===== VOTING =====
57
- vote = Counter(top_statuses).most_common(1)[0]
58
 
59
  return {
60
  "prediction": vote[0],
61
- "votes": dict(Counter(top_statuses)),
62
- "top_k": results
63
  }
64
 
65
  # ===== RUN IF MAIN =====
 
11
  # ===== CONFIG =====
12
  INDEX_PATH = "faiss.index"
13
  META_PATH = "metadata.pkl.gz"
14
+ MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
15
+ CHUNK_SIZE = 2000 # طول كل chunk بال characters
16
 
17
  # ===== LOAD FAISS INDEX =====
18
  index = faiss.read_index(INDEX_PATH)
 
34
  text: str
35
  k: int = 5 # أعلى 5 مشابهين افتراضي
36
 
37
+ # ===== HELPER: split long text into chunks =====
38
+ def split_text(text, chunk_size=CHUNK_SIZE):
39
+ chunks = []
40
+ for i in range(0, len(text), chunk_size):
41
+ chunks.append(text[i:i+chunk_size])
42
+ return chunks
43
+
44
  # ===== PREDICTION ROUTE =====
45
  @app.post("/predict")
46
  def predict(query: Query):
47
+ text_chunks = split_text(query.text)
48
+ all_top_statuses = []
49
+ all_results = []
50
+
51
+ for chunk in text_chunks:
52
+ # Escape backslashes
53
+ chunk = chunk.replace("\\", "\\\\")
54
+ # ===== EMBEDDING =====
55
+ q_emb = model.encode([chunk]).astype("float32")
56
+ distances, indices = index.search(q_emb, query.k)
57
+
58
+ top_statuses = []
59
+ results = []
60
+
61
+ for rank, idx in enumerate(indices[0]):
62
+ status = statuses[idx]
63
+ top_statuses.append(status)
64
+ results.append({
65
+ "rank": rank + 1,
66
+ "text": texts[idx],
67
+ "status": status,
68
+ "distance": float(distances[0][rank])
69
+ })
70
+
71
+ all_top_statuses.extend(top_statuses)
72
+ all_results.extend(results)
73
 
74
  # ===== VOTING =====
75
+ vote = Counter(all_top_statuses).most_common(1)[0]
76
 
77
  return {
78
  "prediction": vote[0],
79
+ "votes": dict(Counter(all_top_statuses)),
80
+ "top_k": all_results[:query.k] # أعلى k من كل النتائج
81
  }
82
 
83
  # ===== RUN IF MAIN =====