File size: 6,797 Bytes
e34edc7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
#!/bin/bash

set -euo pipefail

echo "🚀 Testing Ingestion Pipeline Upload"
echo "======================================"

# Configuration
BACKEND_URL="https://binkhoale1812-studdybuddy-ingestion3.hf.space"
USER_ID="44e65346-8eaa-4f95-b17a-f6219953e7a8"
PROJECT_ID="496e2fad-ec7e-4562-b06a-ea2491f2460"

# Test files
FILE1="../exefiles/Lecture8_PSO_ACO.pdf"
FILE2="../exefiles/Tut8.pdf"

# Debug toggles
DEBUG=${DEBUG:-0}
TRACE=${TRACE:-0}

echo "📋 Configuration:"
echo "   Backend URL: $BACKEND_URL"
echo "   User ID: $USER_ID"
echo "   Project ID: $PROJECT_ID"
echo "   Files: $FILE1, $FILE2"
echo ""

curl_base() {
  local method="$1"; shift
  local url="$1"; shift
  local extra=("$@")
  local common=(
    -L --http1.1 --fail-with-body -sS
    --connect-timeout 60
    --retry 5 --retry-delay 4 --retry-connrefused
  )
  if [ "$DEBUG" = "1" ]; then
    common+=( -v )
  fi
  if [ "$TRACE" = "1" ]; then
    common+=( --trace-time --trace-ascii - )
  fi
  curl -X "$method" "$url" "${common[@]}" "${extra[@]}"
}

json_with_status() {
  local method="$1"; shift
  local url="$1"; shift
  local extra=("$@")
  curl_base "$method" "$url" "${extra[@]}" \
    -w "\nHTTP Status: %{http_code}\n"
}

# Step 0: Preflight (for browser parity)
echo "🛰️  Step 0: OPTIONS /upload (preflight parity)"
echo "---------------------------------------------"
json_with_status OPTIONS "$BACKEND_URL/upload" -H "Origin: https://example.com" -H "Access-Control-Request-Method: POST" || true
echo ""; echo ""

# Step 1: Health Check
echo "🏥 Step 1: Health Check"
echo "------------------------"
json_with_status GET "$BACKEND_URL/health" -H "Accept: application/json" || true
echo ""; echo ""

# Step 2: Upload Files
echo "📁 Step 2: Upload Files (sequential)"
echo "------------------------------------"
echo "Uploading $(basename "$FILE1")..."

UPLOAD_HEADERS=$(mktemp)
UPLOAD_BODY=$(mktemp)

set +e
HTTP_CODE=$(curl -L --http1.1 --fail-with-body -sS \
  --connect-timeout 60 --retry 3 --retry-delay 4 --retry-connrefused \
  -H "Expect:" \
  -X POST "$BACKEND_URL/upload" \
  -F "user_id=$USER_ID" \
  -F "project_id=$PROJECT_ID" \
  -F "files=@$FILE1" \
  -D "$UPLOAD_HEADERS" -o "$UPLOAD_BODY" \
  -w "%{http_code}")
RET=$?
set -e

echo "HTTP Status: $HTTP_CODE"
echo "--- Response Headers ---"; sed -e 's/\r$//' "$UPLOAD_HEADERS" | sed 's/^/  /'
echo "--- Response Body ---"; sed 's/^/  /' "$UPLOAD_BODY"

if [ "$RET" -ne 0 ] || [ "$HTTP_CODE" = "000" ]; then
  echo "❌ Upload failed (curl exit=$RET, http=$HTTP_CODE)"; exit 1
fi

# Extract job_id (prefer jq)
if command -v jq >/dev/null 2>&1; then
  JOB_ID=$(jq -r '.job_id // empty' < "$UPLOAD_BODY")
else
  JOB_ID=$(python3 - <<'PY'
import sys, json
try:
  data=json.load(sys.stdin)
  print(data.get('job_id',''))
except Exception:
  print('')
PY
  < "$UPLOAD_BODY")
fi

if [ -z "${JOB_ID:-}" ]; then
  echo "❌ Failed to extract job_id from upload response"; exit 1
fi

echo ""
echo "✅ Upload 1 initiated successfully!"
echo "   Job ID: $JOB_ID"
echo ""

# Step 3: Monitor Upload Progress
echo "📊 Step 3: Monitor Upload Progress"
echo "----------------------------------"

for i in {1..48}; do
  echo "Checking progress (attempt $i/12)..."
  json_with_status GET "$BACKEND_URL/upload/status?job_id=$JOB_ID" -H "Accept: application/json" | sed 's/^/  /'
  STATUS_LINE=$(json_with_status GET "$BACKEND_URL/upload/status?job_id=$JOB_ID" -H "Accept: application/json" | tail -n +1)
  if echo "$STATUS_LINE" | grep -q '"status":"completed"'; then
    echo "✅ Upload completed successfully!"; break
  elif echo "$STATUS_LINE" | grep -q '"status":"processing"'; then
    echo "⏳ Still processing... waiting 20 seconds"; sleep 20
  else
    echo "❌ Upload failed or unknown status"; break
  fi
  echo ""
done

echo ""

echo "📁 Step 3: Upload second file"
echo "------------------------------"
echo "Uploading $(basename "$FILE2")..."

UPLOAD_HEADERS2=$(mktemp)
UPLOAD_BODY2=$(mktemp)

set +e
HTTP_CODE2=$(curl -L --http1.1 --fail-with-body -sS \
  --connect-timeout 60 --retry 3 --retry-delay 4 --retry-connrefused \
  -H "Expect:" \
  -X POST "$BACKEND_URL/upload" \
  -F "user_id=$USER_ID" \
  -F "project_id=$PROJECT_ID" \
  -F "files=@$FILE2" \
  -D "$UPLOAD_HEADERS2" -o "$UPLOAD_BODY2" \
  -w "%{http_code}")
RET2=$?
set -e

echo "HTTP Status: $HTTP_CODE2"
echo "--- Response Headers ---"; sed -e 's/\r$//' "$UPLOAD_HEADERS2" | sed 's/^/  /'
echo "--- Response Body ---"; sed 's/^/  /' "$UPLOAD_BODY2"

if [ "$RET2" -ne 0 ] || [ "$HTTP_CODE2" = "000" ]; then
  echo "❌ Upload 2 failed (curl exit=$RET2, http=$HTTP_CODE2)"; exit 1
fi

if command -v jq >/dev/null 2>&1; then
  JOB_ID2=$(jq -r '.job_id // empty' < "$UPLOAD_BODY2")
else
  JOB_ID2=$(python3 - <<'PY'
import sys, json
try:
  data=json.load(sys.stdin)
  print(data.get('job_id',''))
except Exception:
  print('')
PY
  < "$UPLOAD_BODY2")
fi

if [ -z "${JOB_ID2:-}" ]; then
  echo "❌ Failed to extract job_id from second upload response"; exit 1
fi

echo ""
echo "✅ Upload 2 initiated successfully!"
echo "   Job ID: $JOB_ID2"
echo ""

echo "📊 Step 4: Monitor Upload 2 Progress"
echo "-------------------------------------"
for i in {1..48}; do
  echo "Checking progress (attempt $i/48)..."
  json_with_status GET "$BACKEND_URL/upload/status?job_id=$JOB_ID2" -H "Accept: application/json" | sed 's/^/  /'
  STATUS_LINE=$(json_with_status GET "$BACKEND_URL/upload/status?job_id=$JOB_ID2" -H "Accept: application/json" | tail -n +1)
  if echo "$STATUS_LINE" | grep -q '"status":"completed"'; then
    echo "✅ Upload 2 completed successfully!"; break
  elif echo "$STATUS_LINE" | grep -q '"status":"processing"'; then
    echo "⏳ Still processing... waiting 20 seconds"; sleep 20
  else
    echo "❌ Upload 2 failed or unknown status"; break
  fi
  echo ""
done

echo ""

# Step 4: List Uploaded Files
echo "📋 Step 4: List Uploaded Files"
echo "-------------------------------"
json_with_status GET "$BACKEND_URL/files?user_id=$USER_ID&project_id=$PROJECT_ID" -H "Accept: application/json" | sed 's/^/  /'
echo ""; echo ""

# Step 5: Get File Chunks (for Lecture8_PSO_ACO.pdf)
echo "🔍 Step 5: Get File Chunks for Lecture8_PSO_ACO.pdf"
echo "----------------------------------------------"
json_with_status GET "$BACKEND_URL/files/chunks?user_id=$USER_ID&project_id=$PROJECT_ID&filename=Lecture8_PSO_ACO.pdf&limit=5" -H "Accept: application/json" | sed 's/^/  /'
echo ""; echo ""

# Step 6: Get File Chunks (for Tut8.pdf)
echo "🔍 Step 6: Get File Chunks for Tut8.pdf"
echo "------------------------------------------------"
json_with_status GET "$BACKEND_URL/files/chunks?user_id=$USER_ID&project_id=$PROJECT_ID&filename=Tut8.pdf&limit=5" -H "Accept: application/json" | sed 's/^/  /'

echo ""
echo "🎉 Test completed!"
echo "=================="