|
|
import streamlit as st |
|
|
import PyPDF2 |
|
|
import pandas as pd |
|
|
import os |
|
|
|
|
|
|
|
|
def extract_pdf_content(file): |
|
|
reader = PyPDF2.PdfReader(file) |
|
|
content = "" |
|
|
for page_num in range(len(reader.pages)): |
|
|
content += reader.pages[page_num].extract_text() |
|
|
return content |
|
|
|
|
|
|
|
|
def generate_qa(content, topic, num_questions, answer_type, custom_conditions): |
|
|
questions = [] |
|
|
answers = [] |
|
|
for i in range(1, num_questions + 1): |
|
|
questions.append(f"Sample question {i} about {topic}") |
|
|
answers.append(f"Sample {answer_type.lower()} answer for question {i}") |
|
|
return questions, answers |
|
|
|
|
|
|
|
|
def save_to_csv(questions, answers): |
|
|
df = pd.DataFrame({'Questions': questions, 'Answers': answers}) |
|
|
|
|
|
directory = "saved_files" |
|
|
os.makedirs(directory, exist_ok=True) |
|
|
file_path = os.path.join(directory, 'questions_answers.csv') |
|
|
df.to_csv(file_path, index=False) |
|
|
return file_path |
|
|
|
|
|
|
|
|
def app(): |
|
|
|
|
|
col1, col2 = st.columns([1, 3]) |
|
|
|
|
|
with col1: |
|
|
st.image("C://Users//hafee//Downloads//Logo.jpeg", width=150) |
|
|
|
|
|
with col2: |
|
|
st.markdown( |
|
|
"<h1 style='text-align: left; margin-top: -10px;'>Synthetic Data Generator</h1>", |
|
|
unsafe_allow_html=True |
|
|
) |
|
|
|
|
|
|
|
|
st.markdown("<h2 style='text-align: center;'>Your Reliable Synthetic Dataset Generation</h2>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
file = st.file_uploader("Drag your Content or Document (.pdf only)", type=['pdf']) |
|
|
|
|
|
|
|
|
topic = st.text_input("Topic Name", placeholder="Enter the topic name") |
|
|
|
|
|
|
|
|
num_questions = st.number_input("Number of Questions", min_value=1, max_value=100, value=5, step=1) |
|
|
|
|
|
|
|
|
answer_type = st.radio("Answer Type", options=["One-word", "Short", "Long"], index=1, horizontal=True) |
|
|
|
|
|
|
|
|
custom_conditions = st.text_area("Custom Conditions", placeholder="Enter any custom rules for the LLM...") |
|
|
|
|
|
|
|
|
generate_button = st.button("Generate") |
|
|
|
|
|
if generate_button and file and topic: |
|
|
|
|
|
content = extract_pdf_content(file) |
|
|
|
|
|
|
|
|
questions, answers = generate_qa(content, topic, num_questions, answer_type, custom_conditions) |
|
|
|
|
|
|
|
|
st.subheader("Generated Questions and Answers") |
|
|
for i, (q, a) in enumerate(zip(questions, answers), start=1): |
|
|
st.write(f"*Q{i}:* {q}") |
|
|
st.write(f"*A{i}:* {a}") |
|
|
st.write("---") |
|
|
|
|
|
|
|
|
csv_file_path = save_to_csv(questions, answers) |
|
|
|
|
|
|
|
|
st.success(f"The CSV file has been saved to the server at: {csv_file_path}") |
|
|
|
|
|
|
|
|
with open(csv_file_path, 'rb') as f: |
|
|
st.download_button( |
|
|
label="Download as CSV", |
|
|
data=f, |
|
|
file_name="questions_answers.csv", |
|
|
mime="text/csv" |
|
|
) |
|
|
|
|
|
|
|
|
st.write("Click the button above to download your CSV file.") |
|
|
|
|
|
|
|
|
if _name_ == "_main_": |
|
|
app() |