File size: 6,587 Bytes
cba2c8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
import streamlit as st
from pathlib import Path
import requests

API_URL = "http://localhost:8000"

# ----------------------------------
# Setup folders (UI only)
# ----------------------------------
BASE_DIR = Path.cwd()
QUERY_DIR = BASE_DIR / "queries"
QUERY_DIR.mkdir(parents=True, exist_ok=True)

# ----------------------------------
# Session state init (CRITICAL FIX)
# ----------------------------------
if "saved_docs" not in st.session_state:
    st.session_state.saved_docs = []

if "saved_once" not in st.session_state:
    st.session_state.saved_once = set()   # prevents duplicates

saved_docs = st.session_state.saved_docs

# ----------------------------------
# UI
# ----------------------------------
st.set_page_config(page_title="Exam Pipeline", layout="wide")
st.title("πŸ“˜ Exam Question Processing Pipeline")

api_key = "your_groq_api_key_here"  # Replace with your actual API key or use st.text_input to get from user

num_docs = st.number_input(
    "Number of documents",
    min_value=1,
    step=1
)

# ----------------------------------
# Upload Section
# ----------------------------------
for i in range(num_docs):
    st.subheader(f"Document {i+1}")

    doc_type = st.radio(
        "Input type",
        ["Images", "PDF"],
        key=f"type_{i}"
    )

    # -------- Images --------
    if doc_type == "Images":
        uploaded_images = st.file_uploader(
            "Upload images",
            type=["png", "jpg", "jpeg"],
            accept_multiple_files=True,
            key=f"img_{i}"
        )

        if uploaded_images and st.button(f"Save Images as PDF (Doc {i+1})"):
            unique_key = f"img_{i}_{','.join(img.name for img in uploaded_images)}"

            if unique_key not in st.session_state.saved_once:
                res = requests.post(
                    f"{API_URL}/images-to-pdf",
                    files=[
                        ("files", (img.name, img.getvalue(), img.type))
                        for img in uploaded_images
                    ]
                )

                if res.status_code != 200:
                    st.error(res.text)
                    st.stop()

                data = res.json()
                if "error" in data:
                    st.error(data["error"])
                    st.stop()

                pdf_path = data["path"]
                saved_docs.append(pdf_path)
                st.session_state.saved_once.add(unique_key)

                st.success(f"Saved β†’ {pdf_path}")
            else:
                st.info("Images already saved for this document.")

    # -------- PDF --------
    else:
        uploaded_pdf = st.file_uploader(
            "Upload PDF",
            type=["pdf"],
            key=f"pdf_{i}"
        )

        if uploaded_pdf:
            unique_key = f"pdf_{i}_{uploaded_pdf.name}"

            if unique_key not in st.session_state.saved_once:
                res = requests.post(
                    f"{API_URL}/save-pdf",
                    files={
                        "file": (
                            uploaded_pdf.name,
                            uploaded_pdf.getvalue(),
                            "application/pdf"
                        )
                    }
                )

                if res.status_code != 200:
                    st.error(res.text)
                    st.stop()

                data = res.json()
                if "error" in data:
                    st.error(data["error"])
                    st.stop()

                pdf_path = data["path"]
                saved_docs.append(pdf_path)
                st.session_state.saved_once.add(unique_key)

                st.success(f"Saved β†’ {pdf_path}")
            else:
                st.info("PDF already saved for this document.")

# ----------------------------------
# Run Pipeline
# ----------------------------------
import time
import time

if st.button("πŸš€ Run Full Pipeline"):

    if not api_key:
        st.error("❌ API key required")
        st.stop()

    if not saved_docs:
        st.error("❌ Please upload images or PDFs first")
        st.stop()

    # ---------------- START PIPELINE ----------------
    start_res = requests.post(
        f"{API_URL}/run-pipeline",
        params={"api_key": api_key},
        json=saved_docs
    )

    if start_res.status_code != 200:
        st.error(start_res.text)
        st.stop()

    start_data = start_res.json()
    job_id = start_data.get("job_id")

    if not job_id:
        st.error("❌ Failed to start pipeline")
        st.stop()

    # ---------------- STATUS POLLING ----------------
    st.subheader("πŸš€ Pipeline Progress")
    status_box = st.empty()

    final_result = None

    while True:
        status_res = requests.get(f"{API_URL}/job-status/{job_id}")

        if status_res.status_code != 200:
            st.error(status_res.text)
            st.stop()

        status_data = status_res.json()
        status_text = status_data.get("status", "Unknown status")

        status_box.info(status_text)

        # ❌ error case
        if status_text.startswith("❌"):
            st.error(status_text)
            st.stop()

        # βœ… completed
        if status_text == "βœ… Completed":
            final_result = status_data.get("result")
            break

        time.sleep(1)

    # ---------------- FINAL OUTPUT ----------------
    if not final_result:
        st.error("Pipeline finished but no result returned")
        st.stop()

    st.success("βœ… Pipeline completed successfully!")

    final_pdf = final_result["final_pdf"]
    freq_json = final_result["frequency_json"]

    with open(final_pdf, "rb") as f:
        st.download_button(
            "πŸ“₯ Download Final PDF",
            f,
            file_name="Exam_Frequency_Report.pdf"
        )

    with open(freq_json, "rb") as f:
        st.download_button(
            "πŸ“₯ Download Frequency JSON",
            f,
            file_name="output_frequency.json"
        )

# ----------------------------------
# Debug view (UNCHANGED)
# ----------------------------------
st.subheader("πŸ“‚ PDFs available in queries/")
st.write([str(p) for p in QUERY_DIR.glob("*.pdf")])

st.subheader("πŸ“‚ Recently Uploaded PDFs (This Session Only)")
if saved_docs:
    st.write(saved_docs)
else:
    st.info("No documents uploaded in this session.")