ChrisSacrumCor commited on
Commit
35c3e58
·
verified ·
1 Parent(s): 8af733a

create app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -0
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ import time
3
+ import gradio as gr
4
+ import json
5
+
6
+ # Initialize the OpenAI client
7
+ client = OpenAI(api_key=api_key) #'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') # Replace with your actual API key
8
+
9
+ def generate_qa_pairs(chunk, num_pairs=2):
10
+ prompt = f"""Given the following text, generate {num_pairs} question-answer pairs.
11
+ Ensure the questions and answers capture key ideas from the text.
12
+
13
+ Text: {chunk}
14
+
15
+ Format each pair as:
16
+ Q: [Question]
17
+ A: [Answer]
18
+ """
19
+
20
+ try:
21
+ response = client.chat.completions.create(
22
+ model="gpt-3.5-turbo",
23
+ messages=[
24
+ {"role": "system", "content": "Given the following text, generate question-answer pairs."},
25
+ {"role": "user", "content": prompt}
26
+ ],
27
+ temperature=0.8,
28
+ max_tokens=500
29
+ )
30
+ return response.choices[0].message.content
31
+ except Exception as e:
32
+ print(f"An error occurred: {e}")
33
+ time.sleep(20) # Wait for 20 seconds before retrying
34
+ return generate_qa_pairs(chunk, num_pairs) # Retry
35
+
36
+ def process_qa_pairs(qa_text, chunk, id_prefix="dod5000"):
37
+ pairs = []
38
+ qa_split = qa_text.split('Q: ')
39
+ for i, qa in enumerate(qa_split[1:]): # Skip the first empty split
40
+ q, a = qa.split('A: ')
41
+ pairs.append({
42
+ "id": f"{id_prefix}-{i:03d}",
43
+ "question": q.strip(),
44
+ "context": chunk,
45
+ "answers": [{"text": a.strip(), "answer_start": None}] # Note: answer_start is not directly available from the OpenAI response
46
+ })
47
+ return pairs
48
+
49
+ def process_document(file, num_pairs):
50
+ try:
51
+ with open(file.name, 'r') as f:
52
+ text = f.read()
53
+ chunks = [text[i:i+500] for i in range(0, len(text), 500)] # Assuming chunks of 500 characters
54
+ qa_pairs = []
55
+ for i, chunk in enumerate(chunks):
56
+ print(f"Processing chunk {i+1} of {len(chunks)}")
57
+ pairs = generate_qa_pairs(chunk, num_pairs)
58
+ qa_pairs.extend(process_qa_pairs(pairs, chunk, id_prefix=f"dod{i+1}000"))
59
+ time.sleep(3) # To avoid hitting rate limits
60
+ return qa_pairs
61
+ except Exception as e:
62
+ print(f"An error occurred: {e}")
63
+ return []
64
+
65
+ def save_to_json(qa_pairs):
66
+ with open('qa_pairs.json', 'w') as file:
67
+ json.dump(qa_pairs, file, indent=4)
68
+ with open('total_pairs.json', 'w') as file:
69
+ json.dump({"total_pairs": len(qa_pairs)}, file, indent=4)
70
+
71
+ def main(file, num_pairs):
72
+ qa_pairs = process_document(file, num_pairs)
73
+ save_to_json(qa_pairs)
74
+ return f"Total number of Q&A pairs generated: {len(qa_pairs)}"
75
+
76
+ with gr.Blocks() as demo:
77
+ file_input = gr.File(label="Upload Document")
78
+ num_pairs_input = gr.Number(label="Number of Pairs per Chunk", value=2)
79
+ output_label = gr.Label(label="Output")
80
+
81
+ gr.Button("Generate QA Pairs").click(
82
+ main,
83
+ inputs=[file_input, num_pairs_input],
84
+ outputs=output_label
85
+ )
86
+
87
+ demo.launch()