| | |
| |
|
| | import gradio as gr |
| | import json |
| | import aisuite as ai |
| | import os |
| | from dotenv import load_dotenv |
| | load_dotenv() |
| |
|
| | |
| | os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") |
| |
|
| | client = ai.Client() |
| |
|
| | def generate_draft(topic: str, model: str = "openai:gpt-4o") -> str: |
| | prompt = f"""You are an expert essay writer with strong analytical skills. |
| | |
| | TASK: Write a compelling argumentative essay on the following topic: |
| | "{topic}" |
| | |
| | REQUIREMENTS: |
| | 1. Structure: Introduction with clear thesis β 3 body paragraphs β Conclusion |
| | 2. Each body paragraph should have: claim, evidence/reasoning, and connection to thesis |
| | 3. Address at least one counterargument and refute it |
| | 4. Use clear transitions between paragraphs |
| | 5. Aim for 500 words |
| | |
| | Write the complete essay now:""" |
| |
|
| | response = client.chat.completions.create( |
| | model=model, |
| | messages=[{"role": "user", "content": prompt}], |
| | temperature=1.0, |
| | ) |
| | return response.choices[0].message.content |
| |
|
| |
|
| | def reflect_on_draft(draft: str, model: str = "openai:o4-mini") -> str: |
| | prompt = f"""You are a rigorous writing instructor providing constructive feedback. |
| | |
| | ESSAY TO REVIEW: |
| | \"\"\" |
| | {draft} |
| | \"\"\" |
| | |
| | Analyze this essay across these dimensions and provide specific, actionable feedback: |
| | |
| | 1. **THESIS CLARITY**: Is the main argument clear and specific? |
| | 2. **ARGUMENT STRENGTH**: Are claims well-supported? Is reasoning logical? |
| | 3. **EVIDENCE QUALITY**: Are examples concrete and relevant? |
| | 4. **STRUCTURE**: Does the organization flow logically? |
| | 5. **COUNTERARGUMENTS**: Are opposing views addressed fairly? |
| | 6. **STYLE & CLARITY**: Is the writing concise and clear? |
| | |
| | For each dimension, identify what works well and what needs improvement. |
| | End with your TOP 3 PRIORITY improvements.""" |
| |
|
| | response = client.chat.completions.create( |
| | model=model, |
| | messages=[{"role": "user", "content": prompt}], |
| | temperature=1.0, |
| | ) |
| | return response.choices[0].message.content |
| |
|
| |
|
| | def revise_draft(original_draft: str, reflection: str, model: str = "openai:gpt-4o") -> str: |
| | prompt = f"""You are an expert editor tasked with improving an essay based on feedback. |
| | |
| | ORIGINAL ESSAY: |
| | \"\"\" |
| | {original_draft} |
| | \"\"\" |
| | |
| | FEEDBACK RECEIVED: |
| | \"\"\" |
| | {reflection} |
| | \"\"\" |
| | |
| | REVISION INSTRUCTIONS: |
| | 1. Address EACH piece of feedback |
| | 2. Strengthen the thesis if unclear |
| | 3. Add concrete evidence where suggested |
| | 4. Improve transitions between paragraphs |
| | 5. Ensure counterarguments are properly addressed |
| | 6. Fix any awkward phrasing |
| | |
| | IMPORTANT: |
| | - Return ONLY the complete revised essay |
| | - The revised essay MUST be at least 400 words |
| | - If the original is unclear, expand it into a full essay""" |
| |
|
| | response = client.chat.completions.create( |
| | model=model, |
| | messages=[{"role": "user", "content": prompt}], |
| | temperature=1.0, |
| | ) |
| | return response.choices[0].message.content |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | EVALUATION_CRITERIA = { |
| | "thesis_clear": "The thesis is stated in ONE clear sentence", |
| | "thesis_debatable": "The thesis makes a debatable claim", |
| | "thesis_specific": "The thesis is narrow and focused", |
| | "thesis_positioned": "The thesis appears in the introduction", |
| | "multiple_reasons": "At least 3 DISTINCT supporting reasons", |
| | "reasons_developed": "Each reason is explained in depth", |
| | "logical_progression": "Arguments build on each other", |
| | "no_logical_fallacies": "No logical fallacies present", |
| | "concrete_examples": "Specific real-world examples included", |
| | "named_sources": "References specific studies/stats/experts by name", |
| | "evidence_explained": "Evidence is analyzed, not just dropped in", |
| | "varied_evidence": "Multiple types of evidence used", |
| | "counter_acknowledged": "At least one opposing view stated", |
| | "counter_steelmanned": "Counterargument presented fairly", |
| | "counter_refuted": "Substantive rebuttal provided", |
| | "hook_present": "Introduction has an engaging hook", |
| | "topic_sentences": "Each paragraph has a topic sentence", |
| | "smooth_transitions": "Varied transitions between paragraphs", |
| | "strong_conclusion": "Conclusion synthesizes (not just summarizes)", |
| | "sentence_variety": "Varied sentence structures", |
| | "precise_language": "Precise word choices (no vague terms)", |
| | "no_repetition": "No excessive repetition", |
| | "active_voice": "Predominantly active voice", |
| | "no_filler": "No filler phrases", |
| | } |
| |
|
| |
|
| | def evaluate_essay(essay: str, model: str = "openai:gpt-4o") -> dict: |
| | criteria_text = "\n".join([ |
| | f'{i+1}. "{name}": {desc}' |
| | for i, (name, desc) in enumerate(EVALUATION_CRITERIA.items()) |
| | ]) |
| |
|
| | prompt = f"""You are an EXTREMELY strict essay evaluator. |
| | |
| | ESSAY: |
| | \"\"\" |
| | {essay} |
| | \"\"\" |
| | |
| | CRITERIA (1 ONLY if FULLY met, otherwise 0): |
| | {criteria_text} |
| | |
| | RULES: Be HARSH. A typical draft should score 40-60%. If unsure, score 0. |
| | |
| | Respond in this JSON format: |
| | {{ |
| | "thesis_clear": 0 or 1, |
| | "thesis_debatable": 0 or 1, |
| | "thesis_specific": 0 or 1, |
| | "thesis_positioned": 0 or 1, |
| | "multiple_reasons": 0 or 1, |
| | "reasons_developed": 0 or 1, |
| | "logical_progression": 0 or 1, |
| | "no_logical_fallacies": 0 or 1, |
| | "concrete_examples": 0 or 1, |
| | "named_sources": 0 or 1, |
| | "evidence_explained": 0 or 1, |
| | "varied_evidence": 0 or 1, |
| | "counter_acknowledged": 0 or 1, |
| | "counter_steelmanned": 0 or 1, |
| | "counter_refuted": 0 or 1, |
| | "hook_present": 0 or 1, |
| | "topic_sentences": 0 or 1, |
| | "smooth_transitions": 0 or 1, |
| | "strong_conclusion": 0 or 1, |
| | "sentence_variety": 0 or 1, |
| | "precise_language": 0 or 1, |
| | "no_repetition": 0 or 1, |
| | "active_voice": 0 or 1, |
| | "no_filler": 0 or 1 |
| | }} |
| | |
| | Return ONLY valid JSON.""" |
| |
|
| | response = client.chat.completions.create( |
| | model=model, |
| | messages=[{"role": "user", "content": prompt}], |
| | temperature=0, |
| | ) |
| |
|
| | try: |
| | result = json.loads(response.choices[0].message.content) |
| | except json.JSONDecodeError: |
| | content = response.choices[0].message.content |
| | result = json.loads(content[content.find('{'):content.rfind('}')+1]) |
| |
|
| | for key in result: |
| | result[key] = 1 if result[key] else 0 |
| |
|
| | result["total_score"] = sum(v for k, v in result.items() if k in EVALUATION_CRITERIA) |
| | result["max_score"] = len(EVALUATION_CRITERIA) |
| | result["percentage"] = round(100 * result["total_score"] / result["max_score"], 1) |
| |
|
| | return result |
| |
|
| |
|
| | def format_evaluation(eval_result: dict) -> str: |
| | categories = { |
| | "Thesis": ["thesis_clear", "thesis_debatable", "thesis_specific", "thesis_positioned"], |
| | "Argument Depth": ["multiple_reasons", "reasons_developed", "logical_progression", "no_logical_fallacies"], |
| | "Evidence Quality": ["concrete_examples", "named_sources", "evidence_explained", "varied_evidence"], |
| | "Counterarguments": ["counter_acknowledged", "counter_steelmanned", "counter_refuted"], |
| | "Structure": ["hook_present", "topic_sentences", "smooth_transitions", "strong_conclusion"], |
| | "Writing Quality": ["sentence_variety", "precise_language", "no_repetition", "active_voice", "no_filler"], |
| | } |
| |
|
| | lines = [] |
| | for cat, criteria in categories.items(): |
| | score = sum(eval_result[c] for c in criteria) |
| | max_score = len(criteria) |
| | checks = " ".join(["β
" if eval_result[c] else "β" for c in criteria]) |
| | lines.append(f"{cat:<18} {checks} ({score}/{max_score})") |
| |
|
| | lines.append("-" * 50) |
| | lines.append(f"TOTAL: {eval_result['total_score']}/{eval_result['max_score']} ({eval_result['percentage']}%)") |
| |
|
| | return "\n".join(lines) |
| |
|
| |
|
| | def format_comparison(draft_eval: dict, revised_eval: dict) -> str: |
| | categories = { |
| | "Thesis": ["thesis_clear", "thesis_debatable", "thesis_specific", "thesis_positioned"], |
| | "Argument Depth": ["multiple_reasons", "reasons_developed", "logical_progression", "no_logical_fallacies"], |
| | "Evidence Quality": ["concrete_examples", "named_sources", "evidence_explained", "varied_evidence"], |
| | "Counterarguments": ["counter_acknowledged", "counter_steelmanned", "counter_refuted"], |
| | "Structure": ["hook_present", "topic_sentences", "smooth_transitions", "strong_conclusion"], |
| | "Writing Quality": ["sentence_variety", "precise_language", "no_repetition", "active_voice", "no_filler"], |
| | } |
| |
|
| | lines = [] |
| | lines.append(f"{'Category':<18} {'Draft':>10} {'Revised':>10} {'Change':>10}") |
| | lines.append("=" * 50) |
| |
|
| | for cat, criteria in categories.items(): |
| | d = sum(draft_eval[c] for c in criteria) |
| | r = sum(revised_eval[c] for c in criteria) |
| | mx = len(criteria) |
| | ch = r - d |
| | ch_str = f"+{ch}" if ch > 0 else str(ch) |
| | lines.append(f"{cat:<18} {d}/{mx}:>8 {r}/{mx}:>8 {ch_str:>10}") |
| |
|
| | lines.append("=" * 50) |
| |
|
| | |
| | fixed = [c.replace("_", " ").title() for c in EVALUATION_CRITERIA |
| | if draft_eval[c] == 0 and revised_eval[c] == 1] |
| |
|
| | imp = revised_eval["percentage"] - draft_eval["percentage"] |
| | if imp > 0: |
| | lines.append(f"\nπ IMPROVEMENT: +{imp:.1f}%") |
| | if fixed: |
| | lines.append(f"\nβ
Fixed criteria:") |
| | for c in fixed: |
| | lines.append(f" β’ {c}") |
| |
|
| | return "\n".join(lines) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def run_reflection_workflow(essay_prompt: str, progress=gr.Progress()): |
| | """Run the complete workflow and return all outputs.""" |
| |
|
| | progress(0.1, desc="π Generating draft...") |
| | draft = generate_draft(essay_prompt) |
| |
|
| | progress(0.3, desc="π§ Reflecting on draft...") |
| | feedback = reflect_on_draft(draft) |
| |
|
| | progress(0.5, desc="βοΈ Revising draft...") |
| | revised = revise_draft(draft, feedback) |
| |
|
| | progress(0.7, desc="π Evaluating draft...") |
| | draft_eval = evaluate_essay(draft) |
| |
|
| | progress(0.85, desc="π Evaluating revision...") |
| | revised_eval = evaluate_essay(revised) |
| |
|
| | progress(1.0, desc="β
Complete!") |
| |
|
| | |
| | draft_eval_text = format_evaluation(draft_eval) |
| | revised_eval_text = format_evaluation(revised_eval) |
| | comparison_text = format_comparison(draft_eval, revised_eval) |
| |
|
| | return draft, feedback, revised, draft_eval_text, revised_eval_text, comparison_text |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | with gr.Blocks(title="Reflective Writing Agent") as demo: |
| |
|
| | gr.Markdown(""" |
| | # π€ Reflective Writing Agent |
| | ### An Agentic AI Workflow: Draft β Reflect β Revise β Evaluate |
| | |
| | Enter an essay prompt and watch the AI write, critique, and improve an essay β |
| | with quantified evaluation showing exactly what improved. |
| | """) |
| |
|
| | with gr.Row(): |
| | with gr.Column(scale=3): |
| | prompt_input = gr.Textbox( |
| | label="Essay Prompt", |
| | placeholder="e.g., Should social media platforms be regulated by the government?", |
| | lines=2 |
| | ) |
| | with gr.Column(scale=1): |
| | run_btn = gr.Button("π Run Workflow", variant="primary", size="lg") |
| |
|
| | gr.Markdown("---") |
| |
|
| | with gr.Tabs(): |
| | with gr.TabItem("π Step 1: Draft"): |
| | draft_output = gr.Textbox(label="Initial Draft", lines=15) |
| |
|
| | with gr.TabItem("π§ Step 2: Reflection"): |
| | feedback_output = gr.Textbox(label="Feedback & Critique", lines=15) |
| |
|
| | with gr.TabItem("βοΈ Step 3: Revision"): |
| | revised_output = gr.Textbox(label="Revised Essay", lines=15) |
| |
|
| | with gr.TabItem("π Evaluation"): |
| | with gr.Row(): |
| | with gr.Column(): |
| | gr.Markdown("### Draft Evaluation") |
| | draft_eval_output = gr.Textbox(label="", lines=10) |
| | with gr.Column(): |
| | gr.Markdown("### Revised Evaluation") |
| | revised_eval_output = gr.Textbox(label="", lines=10) |
| |
|
| | gr.Markdown("### π Comparison") |
| | comparison_output = gr.Textbox(label="", lines=12) |
| |
|
| | |
| | run_btn.click( |
| | fn=run_reflection_workflow, |
| | inputs=[prompt_input], |
| | outputs=[draft_output, feedback_output, revised_output, |
| | draft_eval_output, revised_eval_output, comparison_output] |
| | ) |
| |
|
| | |
| | gr.Examples( |
| | examples=[ |
| | ["Should social media platforms be regulated by the government?"], |
| | ["Is artificial intelligence a threat to human employment?"], |
| | ["Should college education be free for all students?"], |
| | ["Are electric vehicles the solution to climate change?"], |
| | ], |
| | inputs=prompt_input |
| | ) |
| |
|
| | |
| | if __name__ == "__main__": |
| | demo.launch(theme=gr.themes.Soft(), share=True) |