Spaces:

ChrisSacrumCor
/

QApairs

Build error

App Files Files Community

ChrisSacrumCor commited on Mar 17, 2025

Commit

35c3e58

verified ·

1 Parent(s): 8af733a

create app.py

Browse files

Files changed (1) hide show

app.py +87 -0

app.py ADDED Viewed

	@@ -0,0 +1,87 @@

+from openai import OpenAI
+import time
+import gradio as gr
+import json
+# Initialize the OpenAI client
+client = OpenAI(api_key=api_key) #'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')  # Replace with your actual API key
+def generate_qa_pairs(chunk, num_pairs=2):
+    prompt = f"""Given the following text, generate {num_pairs} question-answer pairs.
+    Ensure the questions and answers capture key ideas from the text.
+    Text: {chunk}
+    Format each pair as:
+    Q: [Question]
+    A: [Answer]
+    """
+    try:
+        response = client.chat.completions.create(
+            model="gpt-3.5-turbo",
+            messages=[
+                {"role": "system", "content": "Given the following text, generate question-answer pairs."},
+                {"role": "user", "content": prompt}
+            ],
+            temperature=0.8,
+            max_tokens=500
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        time.sleep(20)  # Wait for 20 seconds before retrying
+        return generate_qa_pairs(chunk, num_pairs)  # Retry
+def process_qa_pairs(qa_text, chunk, id_prefix="dod5000"):
+    pairs = []
+    qa_split = qa_text.split('Q: ')
+    for i, qa in enumerate(qa_split[1:]):  # Skip the first empty split
+        q, a = qa.split('A: ')
+        pairs.append({
+            "id": f"{id_prefix}-{i:03d}",
+            "question": q.strip(),
+            "context": chunk,
+            "answers": [{"text": a.strip(), "answer_start": None}]  # Note: answer_start is not directly available from the OpenAI response
+        })
+    return pairs
+def process_document(file, num_pairs):
+    try:
+        with open(file.name, 'r') as f:
+            text = f.read()
+            chunks = [text[i:i+500] for i in range(0, len(text), 500)]  # Assuming chunks of 500 characters
+            qa_pairs = []
+            for i, chunk in enumerate(chunks):
+                print(f"Processing chunk {i+1} of {len(chunks)}")
+                pairs = generate_qa_pairs(chunk, num_pairs)
+                qa_pairs.extend(process_qa_pairs(pairs, chunk, id_prefix=f"dod{i+1}000"))
+                time.sleep(3)  # To avoid hitting rate limits
+            return qa_pairs
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return []
+def save_to_json(qa_pairs):
+    with open('qa_pairs.json', 'w') as file:
+        json.dump(qa_pairs, file, indent=4)
+    with open('total_pairs.json', 'w') as file:
+        json.dump({"total_pairs": len(qa_pairs)}, file, indent=4)
+def main(file, num_pairs):
+    qa_pairs = process_document(file, num_pairs)
+    save_to_json(qa_pairs)
+    return f"Total number of Q&A pairs generated: {len(qa_pairs)}"
+with gr.Blocks() as demo:
+    file_input = gr.File(label="Upload Document")
+    num_pairs_input = gr.Number(label="Number of Pairs per Chunk", value=2)
+    output_label = gr.Label(label="Output")
+    gr.Button("Generate QA Pairs").click(
+        main,
+        inputs=[file_input, num_pairs_input],
+        outputs=output_label
+    )
+demo.launch()