Spaces:
Build error
Build error
| import os | |
| from openai import OpenAI | |
| import time | |
| import gradio as gr | |
| import json | |
| # Get the API key from environment variable | |
| api_key = os.environ.get('OPEN_API_KEY') | |
| # Initialize the OpenAI client | |
| client = OpenAI(api_key=api_key) | |
| def generate_qa_pairs(chunk, num_pairs=2): | |
| prompt = f"""Given the following text, generate {num_pairs} question-answer pairs. | |
| Ensure the questions and answers capture key ideas from the text. | |
| Text: {chunk} | |
| Format each pair as: | |
| Q: [Question] | |
| A: [Answer] | |
| """ | |
| try: | |
| response = client.chat.completions.create( | |
| model="gpt-3.5-turbo", | |
| messages=[ | |
| {"role": "system", "content": "Given the following text, generate question-answer pairs."}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| temperature=0.8, | |
| max_tokens=500 | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| print(f"An error occurred: {e}") | |
| time.sleep(20) # Wait for 20 seconds before retrying | |
| return generate_qa_pairs(chunk, num_pairs) # Retry | |
| def process_qa_pairs(qa_text, chunk, id_prefix="dod5000"): | |
| pairs = [] | |
| qa_split = qa_text.split('Q: ') | |
| for i, qa in enumerate(qa_split[1:]): # Skip the first empty split | |
| q, a = qa.split('A: ') | |
| pairs.append({ | |
| "id": f"{id_prefix}-{i:03d}", | |
| "question": q.strip(), | |
| "context": chunk, | |
| "answers": [{"text": a.strip(), "answer_start": None}] # Note: answer_start is not directly available from the OpenAI response | |
| }) | |
| return pairs | |
| def process_document(file, num_pairs): | |
| try: | |
| with open(file.name, 'r') as f: | |
| text = f.read() | |
| chunks = [text[i:i+500] for i in range(0, len(text), 500)] # Assuming chunks of 500 characters | |
| qa_pairs = [] | |
| for i, chunk in enumerate(chunks): | |
| print(f"Processing chunk {i+1} of {len(chunks)}") | |
| pairs = generate_qa_pairs(chunk, num_pairs) | |
| qa_pairs.extend(process_qa_pairs(pairs, chunk, id_prefix=f"dod{i+1}000")) | |
| time.sleep(3) # To avoid hitting rate limits | |
| return qa_pairs | |
| except Exception as e: | |
| print(f"An error occurred: {e}") | |
| return [] | |
| def save_to_json(qa_pairs): | |
| with open('qa_pairs.json', 'w') as file: | |
| json.dump(qa_pairs, file, indent=4) | |
| with open('total_pairs.json', 'w') as file: | |
| json.dump({"total_pairs": len(qa_pairs)}, file, indent=4) | |
| def main(file, num_pairs): | |
| qa_pairs = process_document(file, num_pairs) | |
| save_to_json(qa_pairs) | |
| return f"Total number of Q&A pairs generated: {len(qa_pairs)}" | |
| with gr.Blocks() as demo: | |
| file_input = gr.File(label="Upload Document") | |
| num_pairs_input = gr.Number(label="Number of Pairs per Chunk", value=2) | |
| output_label = gr.Label(label="Output") | |
| gr.Button("Generate QA Pairs").click( | |
| main, | |
| inputs=[file_input, num_pairs_input], | |
| outputs=output_label | |
| ) | |
| demo.launch() | |