File size: 3,177 Bytes
9ef8237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import streamlit as st
from backend import Database, Context, QuestionGeneration, AnswerGeneration, create_csv


# Layout for logo and header
col1, col2 = st.columns([1, 3])
with col1:
    st.image("logo.jpeg", width=150)
with col2:
    st.markdown(
        "<h1 style='text-align: left; margin-top: -10px;'>Synthetic Data Generator</h1>",
        unsafe_allow_html=True
    )
    st.markdown("<h2 style='text-align: center;'>Your Reliable Synthetic Dataset Generation</h2>", unsafe_allow_html=True)

# File Upload Section
file = st.file_uploader("Choose PDF Files", accept_multiple_files=True, type="pdf")
topic = st.text_input("Topic Name", placeholder="Enter the topic name")
num_questions = st.number_input("Number of Questions", min_value=5, max_value=100, value=8)
answer_type = st.radio("Answer Type", options=["One-word", "Short", "Long"], index=1, horizontal=True)
custom_conditions = st.text_area("Custom Conditions", placeholder="Enter any custom rules for the LLM...")

# Generate Button with Input Validation
if st.button("Generate") and file and topic:
    # Process PDF Files and Store Chunks in Vector Store
    with st.spinner("Storing in Database..."):
        db = Database(file)
        db.store()

    # Retrieve Context Based on Topic
    with st.spinner("Retriving Contexts..."):
        context_obj = Context(topic)
        clarified_query = context_obj.redefine()
        context_content = context_obj.retrieve_faiss(clarified_query)

    with st.spinner("Generating Questions..."):
        # Generate Questions
        question_gen = QuestionGeneration(context=context_content, num_questions=num_questions,
                                          question_type=answer_type, conditions=custom_conditions)
        total_questions, questions = question_gen.generate()

    #Display Total questions generated
    st.write(f"Total {total_questions} Questions Generated")

    with st.spinner(f"Generating Answer"):
        # CSS to enlarge the progress bar
        st.markdown("""
                    <style>
                    .stProgress > div > div > div > div {
                        background-color: red; /* Change the color to red */
                        height: 25px; /* Increase this value to make it larger */
                    }
                    </style>
                    """, unsafe_allow_html=True)

        progress_bar = st.progress(0)
        percentage_text = st.empty()

        # Generate Answers
        answer_gen = AnswerGeneration(context_content, questions, answer_type, custom_conditions,
                                      percentage_text=percentage_text, progress_bar=progress_bar)
        answers = answer_gen.generate()

    st.success("Answers Generated")


    # Save Questions and Answers to CSV and Provide Download Option
    csv_file_path,df = create_csv(questions, answers, topic)
    st.write("Preview of Data")
    st.dataframe(df)


    # CSV Download Button
    with open(csv_file_path, 'rb') as file:
        st.write("Click the below button to download your CSV file.")
        st.download_button(label="Download as CSV", data=file, file_name=f"{topic}_questions_answers.csv", mime="text/csv")