Spaces:
Build error
Build error
File size: 3,075 Bytes
b4e9e1e 35c3e58 6b18659 35c3e58 d37bb88 35c3e58 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import os
from openai import OpenAI
import time
import gradio as gr
import json
# Get the API key from environment variable
api_key = os.environ.get('OPEN_API_KEY')
# Initialize the OpenAI client
client = OpenAI(api_key=api_key)
def generate_qa_pairs(chunk, num_pairs=2):
prompt = f"""Given the following text, generate {num_pairs} question-answer pairs.
Ensure the questions and answers capture key ideas from the text.
Text: {chunk}
Format each pair as:
Q: [Question]
A: [Answer]
"""
try:
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "Given the following text, generate question-answer pairs."},
{"role": "user", "content": prompt}
],
temperature=0.8,
max_tokens=500
)
return response.choices[0].message.content
except Exception as e:
print(f"An error occurred: {e}")
time.sleep(20) # Wait for 20 seconds before retrying
return generate_qa_pairs(chunk, num_pairs) # Retry
def process_qa_pairs(qa_text, chunk, id_prefix="dod5000"):
pairs = []
qa_split = qa_text.split('Q: ')
for i, qa in enumerate(qa_split[1:]): # Skip the first empty split
q, a = qa.split('A: ')
pairs.append({
"id": f"{id_prefix}-{i:03d}",
"question": q.strip(),
"context": chunk,
"answers": [{"text": a.strip(), "answer_start": None}] # Note: answer_start is not directly available from the OpenAI response
})
return pairs
def process_document(file, num_pairs):
try:
with open(file.name, 'r') as f:
text = f.read()
chunks = [text[i:i+500] for i in range(0, len(text), 500)] # Assuming chunks of 500 characters
qa_pairs = []
for i, chunk in enumerate(chunks):
print(f"Processing chunk {i+1} of {len(chunks)}")
pairs = generate_qa_pairs(chunk, num_pairs)
qa_pairs.extend(process_qa_pairs(pairs, chunk, id_prefix=f"dod{i+1}000"))
time.sleep(3) # To avoid hitting rate limits
return qa_pairs
except Exception as e:
print(f"An error occurred: {e}")
return []
def save_to_json(qa_pairs):
with open('qa_pairs.json', 'w') as file:
json.dump(qa_pairs, file, indent=4)
with open('total_pairs.json', 'w') as file:
json.dump({"total_pairs": len(qa_pairs)}, file, indent=4)
def main(file, num_pairs):
qa_pairs = process_document(file, num_pairs)
save_to_json(qa_pairs)
return f"Total number of Q&A pairs generated: {len(qa_pairs)}"
with gr.Blocks() as demo:
file_input = gr.File(label="Upload Document")
num_pairs_input = gr.Number(label="Number of Pairs per Chunk", value=2)
output_label = gr.Label(label="Output")
gr.Button("Generate QA Pairs").click(
main,
inputs=[file_input, num_pairs_input],
outputs=output_label
)
demo.launch()
|