Spaces:
Sleeping
Sleeping
File size: 6,797 Bytes
e34edc7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
#!/bin/bash
set -euo pipefail
echo "🚀 Testing Ingestion Pipeline Upload"
echo "======================================"
# Configuration
BACKEND_URL="https://binkhoale1812-studdybuddy-ingestion3.hf.space"
USER_ID="44e65346-8eaa-4f95-b17a-f6219953e7a8"
PROJECT_ID="496e2fad-ec7e-4562-b06a-ea2491f2460"
# Test files
FILE1="../exefiles/Lecture8_PSO_ACO.pdf"
FILE2="../exefiles/Tut8.pdf"
# Debug toggles
DEBUG=${DEBUG:-0}
TRACE=${TRACE:-0}
echo "📋 Configuration:"
echo " Backend URL: $BACKEND_URL"
echo " User ID: $USER_ID"
echo " Project ID: $PROJECT_ID"
echo " Files: $FILE1, $FILE2"
echo ""
curl_base() {
local method="$1"; shift
local url="$1"; shift
local extra=("$@")
local common=(
-L --http1.1 --fail-with-body -sS
--connect-timeout 60
--retry 5 --retry-delay 4 --retry-connrefused
)
if [ "$DEBUG" = "1" ]; then
common+=( -v )
fi
if [ "$TRACE" = "1" ]; then
common+=( --trace-time --trace-ascii - )
fi
curl -X "$method" "$url" "${common[@]}" "${extra[@]}"
}
json_with_status() {
local method="$1"; shift
local url="$1"; shift
local extra=("$@")
curl_base "$method" "$url" "${extra[@]}" \
-w "\nHTTP Status: %{http_code}\n"
}
# Step 0: Preflight (for browser parity)
echo "🛰️ Step 0: OPTIONS /upload (preflight parity)"
echo "---------------------------------------------"
json_with_status OPTIONS "$BACKEND_URL/upload" -H "Origin: https://example.com" -H "Access-Control-Request-Method: POST" || true
echo ""; echo ""
# Step 1: Health Check
echo "🏥 Step 1: Health Check"
echo "------------------------"
json_with_status GET "$BACKEND_URL/health" -H "Accept: application/json" || true
echo ""; echo ""
# Step 2: Upload Files
echo "📁 Step 2: Upload Files (sequential)"
echo "------------------------------------"
echo "Uploading $(basename "$FILE1")..."
UPLOAD_HEADERS=$(mktemp)
UPLOAD_BODY=$(mktemp)
set +e
HTTP_CODE=$(curl -L --http1.1 --fail-with-body -sS \
--connect-timeout 60 --retry 3 --retry-delay 4 --retry-connrefused \
-H "Expect:" \
-X POST "$BACKEND_URL/upload" \
-F "user_id=$USER_ID" \
-F "project_id=$PROJECT_ID" \
-F "files=@$FILE1" \
-D "$UPLOAD_HEADERS" -o "$UPLOAD_BODY" \
-w "%{http_code}")
RET=$?
set -e
echo "HTTP Status: $HTTP_CODE"
echo "--- Response Headers ---"; sed -e 's/\r$//' "$UPLOAD_HEADERS" | sed 's/^/ /'
echo "--- Response Body ---"; sed 's/^/ /' "$UPLOAD_BODY"
if [ "$RET" -ne 0 ] || [ "$HTTP_CODE" = "000" ]; then
echo "❌ Upload failed (curl exit=$RET, http=$HTTP_CODE)"; exit 1
fi
# Extract job_id (prefer jq)
if command -v jq >/dev/null 2>&1; then
JOB_ID=$(jq -r '.job_id // empty' < "$UPLOAD_BODY")
else
JOB_ID=$(python3 - <<'PY'
import sys, json
try:
data=json.load(sys.stdin)
print(data.get('job_id',''))
except Exception:
print('')
PY
< "$UPLOAD_BODY")
fi
if [ -z "${JOB_ID:-}" ]; then
echo "❌ Failed to extract job_id from upload response"; exit 1
fi
echo ""
echo "✅ Upload 1 initiated successfully!"
echo " Job ID: $JOB_ID"
echo ""
# Step 3: Monitor Upload Progress
echo "📊 Step 3: Monitor Upload Progress"
echo "----------------------------------"
for i in {1..48}; do
echo "Checking progress (attempt $i/12)..."
json_with_status GET "$BACKEND_URL/upload/status?job_id=$JOB_ID" -H "Accept: application/json" | sed 's/^/ /'
STATUS_LINE=$(json_with_status GET "$BACKEND_URL/upload/status?job_id=$JOB_ID" -H "Accept: application/json" | tail -n +1)
if echo "$STATUS_LINE" | grep -q '"status":"completed"'; then
echo "✅ Upload completed successfully!"; break
elif echo "$STATUS_LINE" | grep -q '"status":"processing"'; then
echo "⏳ Still processing... waiting 20 seconds"; sleep 20
else
echo "❌ Upload failed or unknown status"; break
fi
echo ""
done
echo ""
echo "📁 Step 3: Upload second file"
echo "------------------------------"
echo "Uploading $(basename "$FILE2")..."
UPLOAD_HEADERS2=$(mktemp)
UPLOAD_BODY2=$(mktemp)
set +e
HTTP_CODE2=$(curl -L --http1.1 --fail-with-body -sS \
--connect-timeout 60 --retry 3 --retry-delay 4 --retry-connrefused \
-H "Expect:" \
-X POST "$BACKEND_URL/upload" \
-F "user_id=$USER_ID" \
-F "project_id=$PROJECT_ID" \
-F "files=@$FILE2" \
-D "$UPLOAD_HEADERS2" -o "$UPLOAD_BODY2" \
-w "%{http_code}")
RET2=$?
set -e
echo "HTTP Status: $HTTP_CODE2"
echo "--- Response Headers ---"; sed -e 's/\r$//' "$UPLOAD_HEADERS2" | sed 's/^/ /'
echo "--- Response Body ---"; sed 's/^/ /' "$UPLOAD_BODY2"
if [ "$RET2" -ne 0 ] || [ "$HTTP_CODE2" = "000" ]; then
echo "❌ Upload 2 failed (curl exit=$RET2, http=$HTTP_CODE2)"; exit 1
fi
if command -v jq >/dev/null 2>&1; then
JOB_ID2=$(jq -r '.job_id // empty' < "$UPLOAD_BODY2")
else
JOB_ID2=$(python3 - <<'PY'
import sys, json
try:
data=json.load(sys.stdin)
print(data.get('job_id',''))
except Exception:
print('')
PY
< "$UPLOAD_BODY2")
fi
if [ -z "${JOB_ID2:-}" ]; then
echo "❌ Failed to extract job_id from second upload response"; exit 1
fi
echo ""
echo "✅ Upload 2 initiated successfully!"
echo " Job ID: $JOB_ID2"
echo ""
echo "📊 Step 4: Monitor Upload 2 Progress"
echo "-------------------------------------"
for i in {1..48}; do
echo "Checking progress (attempt $i/48)..."
json_with_status GET "$BACKEND_URL/upload/status?job_id=$JOB_ID2" -H "Accept: application/json" | sed 's/^/ /'
STATUS_LINE=$(json_with_status GET "$BACKEND_URL/upload/status?job_id=$JOB_ID2" -H "Accept: application/json" | tail -n +1)
if echo "$STATUS_LINE" | grep -q '"status":"completed"'; then
echo "✅ Upload 2 completed successfully!"; break
elif echo "$STATUS_LINE" | grep -q '"status":"processing"'; then
echo "⏳ Still processing... waiting 20 seconds"; sleep 20
else
echo "❌ Upload 2 failed or unknown status"; break
fi
echo ""
done
echo ""
# Step 4: List Uploaded Files
echo "📋 Step 4: List Uploaded Files"
echo "-------------------------------"
json_with_status GET "$BACKEND_URL/files?user_id=$USER_ID&project_id=$PROJECT_ID" -H "Accept: application/json" | sed 's/^/ /'
echo ""; echo ""
# Step 5: Get File Chunks (for Lecture8_PSO_ACO.pdf)
echo "🔍 Step 5: Get File Chunks for Lecture8_PSO_ACO.pdf"
echo "----------------------------------------------"
json_with_status GET "$BACKEND_URL/files/chunks?user_id=$USER_ID&project_id=$PROJECT_ID&filename=Lecture8_PSO_ACO.pdf&limit=5" -H "Accept: application/json" | sed 's/^/ /'
echo ""; echo ""
# Step 6: Get File Chunks (for Tut8.pdf)
echo "🔍 Step 6: Get File Chunks for Tut8.pdf"
echo "------------------------------------------------"
json_with_status GET "$BACKEND_URL/files/chunks?user_id=$USER_ID&project_id=$PROJECT_ID&filename=Tut8.pdf&limit=5" -H "Accept: application/json" | sed 's/^/ /'
echo ""
echo "🎉 Test completed!"
echo "==================" |