{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "a42824e4", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "905d1b79", "metadata": {}, "source": [ "Built an evaluation model to assess the output of the current model\n", "1. Be able to ask an LLM to evaluate answer\n", "2. Be able to rerun if the answer fail the evaluation\n", "3. Be able to incorporate into a workflow" ] }, { "cell_type": "code", "execution_count": 4, "id": "1b931a48", "metadata": {}, "outputs": [], "source": [ "from dotenv import load_dotenv\n", "import os\n", "from pypdf import PdfReader\n", "import google.generativeai as genai\n", "import gradio as gr\n", "from pydantic import BaseModel\n", "import json\n", "load_dotenv(override=True)\n", "genai.configure(api_key=os.getenv(\"GEMINI_API\"))" ] }, { "cell_type": "code", "execution_count": 2, "id": "220dbf02", "metadata": {}, "outputs": [], "source": [ "# Read the PDF and summary \n", "reader = PdfReader(\"../Week_1/Data_w1/linkedin.pdf\")\n", "linkedin = \"\"\n", "for page in reader.pages:\n", " linkedin += page.extract_text()\n", "\n", "with open(\"../Week_1/Data_w1/summary.txt\", \"r\") as f:\n", " summary = f.read()" ] }, { "cell_type": "code", "execution_count": 3, "id": "6a8c0ccb", "metadata": {}, "outputs": [], "source": [ "# Create a system prompt\n", "initial_system_prompt = f\"You are acting as Ed Donner. You are answering questions on Ed Donner's website, \\\n", "particularly questions related to Ed Donner's career, background, skills and experience. \\\n", "Your responsibility is to represent Ed Donner for interactions on the website as faithfully as possible. \\\n", "You are given a summary of Ed Donner's background and LinkedIn profile which you can use to answer questions. \\\n", "Be professional and engaging, as if talking to a potential client or future employer who came across the website. \\\n", "If you don't know the answer, say so.\"\n", "\n", "initial_system_prompt += f\"\\n\\n## Summary:\\n{summary}\\n\\n## LinkedIn Profile:\\n{linkedin}\\n\\n\"\n", "initial_system_prompt += f\"With this context, please chat with the user, always staying in character as Ed Donner.\"\n", "\n", "chat_session = None" ] }, { "cell_type": "code", "execution_count": 5, "id": "fb1d2679", "metadata": {}, "outputs": [], "source": [ "evaluator_system_prompt = f\"You are an evaluator that decides whether a response to a question is acceptable. \\\n", "You are provided with a conversation between a User and an Agent. Your task is to decide whether the Agent's latest response is acceptable quality. \\\n", "The Agent is playing the role of Ed Donner and is representing Ed Donner on their website. \\\n", "The Agent has been instructed to be professional and engaging, as if talking to a potential client or future employer who came across the website. \\\n", "The Agent has been provided with context on Ed Donner in the form of their summary and LinkedIn details. Here's the information:\"\n", "\n", "evaluator_system_prompt += f\"\\n\\n## Summary:\\n{summary}\\n\\n## LinkedIn Profile:\\n{linkedin}\\n\\n\"\n", "evaluator_system_prompt += f\"With this context, please evaluate the latest response, replying with whether the response is acceptable and your feedback.\"\n", "\n", "def evaluator_user_prompt(reply, message, history):\n", " user_prompt = f\"Here's the conversation between the User and the Agent: \\n\\n{history}\\n\\n\"\n", " user_prompt += f\"Here's the latest message from the User: \\n\\n{message}\\n\\n\"\n", " user_prompt += f\"Here's the latest response from the Agent: \\n\\n{reply}\\n\\n\"\n", " user_prompt += f\"Please evaluate the response, replying with whether it is acceptable and your feedback.\"\n", " return user_prompt" ] }, { "cell_type": "code", "execution_count": 6, "id": "25afd8a8", "metadata": {}, "outputs": [], "source": [ "class Evaluation(BaseModel):\n", " is_acceptable: bool\n", " response: str\n", "\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "5d7aceac", "metadata": {}, "outputs": [], "source": [ "# Create a model for evaluation\n", "\n", "model_evaluator = genai.GenerativeModel(\n", " 'gemini-2.0-flash-exp',\n", " system_instruction=evaluator_system_prompt\n", ")" ] }, { "cell_type": "code", "execution_count": 8, "id": "1b33200d", "metadata": {}, "outputs": [], "source": [ "def evaluate_response(reply, message, history) -> Evaluation:\n", " try:\n", " # Create evaluation prompt\n", " eval_prompt = evaluator_user_prompt(reply, message, history)\n", " response = model_evaluator.generate_content(eval_prompt)\n", "\n", " # Parse the JSON response\n", " try:\n", " eval_data = json.loads(response.text)\n", " return Evaluation(\n", " is_acceptable=eval_data.get(\"is_acceptable\", True),\n", " response=eval_data.get(\"response\", \"No response provided.\")\n", "\n", " )\n", " except json.JSONDecodeError:\n", " # If JSON parsing fails, try to extract boolean and text\n", " text = response.text.lower()\n", " is_acceptable = \"true\" in text or \"acceptable\" in text\n", " return Evaluation(\n", " is_acceptable=is_acceptable,\n", " response=response.text\n", " )\n", " except Exception as e:\n", " # Return default evaluation on error\n", " return Evaluation(\n", " is_acceptable=True,\n", " response=f\"Evaluation failed: {str(e)}\"\n", " )" ] }, { "cell_type": "code", "execution_count": 13, "id": "a2ee32f8", "metadata": {}, "outputs": [], "source": [ "# Create the main chat\n", "def chat(message, history, system_prompt=initial_system_prompt):\n", " model = genai.GenerativeModel(\n", " 'gemini-2.0-flash',\n", " system_instruction=system_prompt\n", " )\n", " # Convert Gradio messages format to Gemini format\n", " gemini_history = []\n", " for msg in history:\n", " if msg[\"role\"] == \"user\":\n", " gemini_history.append({\n", " \"role\": \"user\",\n", " \"parts\": [msg[\"content\"]]\n", " })\n", " elif msg[\"role\"] == \"assistant\":\n", " gemini_history.append({\n", " \"role\": \"model\", # Gemini uses \"model\" instead of \"assistant\"\n", " \"parts\": [msg[\"content\"]]\n", " })\n", " \n", " # Start chat with history\n", " chat_session = model.start_chat(history=gemini_history)\n", " \n", " # Create an acceptable retries if the message is not acceptable\n", " for try_count in range(3):\n", " try:\n", " # Send the current message\n", " response = chat_session.send_message(message).text\n", "\n", " # Evaluate the response\n", " evaluation = evaluate_response(response, message, history)\n", " if evaluation.is_acceptable:\n", " print(\"Passed evaluation - returning reply\")\n", " return response\n", " else:\n", " print(\"Failed evaluation - retrying\")\n", " if try_count < 2:\n", " retry_message = f\"{message}\\n\\nPlease provide a better response. Previous attempt had issues: {evaluation.response}\"\n", " # Create a new chat to avoid the bad response\n", " chat_session = model.start_chat(history=gemini_history)\n", " message = retry_message\n", " else:\n", " return f\"{response}\\n\\n*[Note: Response may need improvement - {evaluation.response}]*\"\n", " except Exception as e:\n", " if try_count < 2:\n", " continue\n", " else:\n", " return f\"Error: {str(e)} after 3 tries\"\n", " return \"Failed to generate acceptable response after maximum retries.\"\n" ] }, { "cell_type": "code", "execution_count": 15, "id": "ba3b599c", "metadata": {}, "outputs": [], "source": [ "# Create interface with additional inputs\n", "with gr.Blocks() as demo:\n", " gr.Markdown(\"# Chat with Google Gemini\")\n", " \n", " system_prompt = gr.Textbox(\n", " value=initial_system_prompt,\n", " label=\"System Prompt\",\n", " placeholder=\"Enter system instructions for the AI...\",\n", " lines=2\n", " )\n", " \n", " chat_interface = gr.ChatInterface(\n", " fn=chat,\n", " additional_inputs=[system_prompt],\n", " title=\"\",\n", " cache_examples=False,\n", " type='messages'\n", " \n", " )" ] }, { "cell_type": "code", "execution_count": null, "id": "ce1addde", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "* Running on local URL: http://127.0.0.1:7862\n", "* To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "