DrSyedFaizan commited on
Commit
07b1dc9
·
verified ·
1 Parent(s): 0552035

Delete eval.py

Browse files
Files changed (1) hide show
  1. eval.py +0 -198
eval.py DELETED
@@ -1,198 +0,0 @@
1
- import os
2
- from dotenv import load_dotenv
3
- import openai
4
- from gradio_client import Client
5
-
6
- # Load API Key from .env file
7
- load_dotenv()
8
- api_key = os.getenv("OPENAI_API_KEY")
9
- print(f"Using OpenAI API Key: {api_key[:5]}****{api_key[-3:]}")
10
- openai.api_key = api_key
11
-
12
- # ---- STEP 1: Load First Aid Contextual Data ----
13
- from langchain_community.document_loaders import ArxivLoader
14
- from langchain.text_splitter import RecursiveCharacterTextSplitter
15
- from langchain_openai import OpenAIEmbeddings
16
- from langchain_community.vectorstores import Chroma
17
- from langchain.prompts import ChatPromptTemplate
18
- from langchain_openai import ChatOpenAI
19
- import wandb
20
- import pandas as pd
21
-
22
- # Load medical and first aid papers from ArXiv
23
- first_aid_docs = ArxivLoader(query="first aid treatment", load_max_docs=5).load()
24
-
25
- # Split documents for indexing
26
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=250)
27
- docs = text_splitter.split_documents(first_aid_docs)
28
-
29
- # Create vectorstore
30
- vectorstore = Chroma.from_documents(docs, OpenAIEmbeddings())
31
- retriever = vectorstore.as_retriever(search_kwargs={"k": 2})
32
-
33
- # ---- Define First Aid Questions ----
34
- questions = [
35
- "What are the first aid measures for high fever in infants?",
36
- "What are the signs and symptoms of low blood sugar?",
37
- "What does RICE stand for in first aid treatment?",
38
- "What is the first aid treatment of bleeding?",
39
- "What is the first aid management of burns?",
40
- "What are the signs and symptoms of stroke?",
41
- "What is the treatment of snake bite?",
42
- "How do you provide first aid for choking?",
43
- "What are the immediate steps to treat a fainting patient?",
44
- "What are the First aid measures for taking care of a patient with insect stings and animal bites?"
45
- ]
46
-
47
- # ---- STEP 2: Generate Ground Truth Responses using ChatGPT ----
48
- llm = ChatOpenAI(model_name="gpt-4", temperature=0)
49
- prompt_template = """
50
- Generate a detailed and accurate first-aid response based on the given context.
51
-
52
- ### CONTEXT
53
- {context}
54
-
55
- ### QUESTION
56
- {question}
57
-
58
- ### RESPONSE
59
- """
60
- prompt = ChatPromptTemplate.from_template(prompt_template)
61
-
62
- ground_truth_responses = []
63
- for question in questions:
64
- retrieved_docs = retriever.invoke(question)
65
- context_text = "\n".join([doc.page_content for doc in retrieved_docs])
66
- generated_response = llm.invoke(prompt.format(context=context_text, question=question))
67
- ground_truth_responses.append(str(generated_response))
68
-
69
- # ---- STEP 3: Fetch Responses from Deployed Chatbot ----
70
- print("\n===== Fetching Responses from Chatbot =====")
71
-
72
- client = Client("DrSyedFaizan/First_Aid_Assistant")
73
- responses = []
74
-
75
- for question in questions:
76
- try:
77
- result = client.predict(chatbot=[], message=question, api_name="/respond")
78
- chat_history = result[1]
79
- chatbot_response = next((entry["content"] for entry in chat_history if entry["role"] == "assistant"), "[NO RESPONSE]")
80
- except Exception as e:
81
- chatbot_response = f"[ERROR: {e}]"
82
-
83
- responses.append(str(chatbot_response))
84
-
85
- # Save bot responses to a text file
86
- with open("bot_responses.txt", "w", encoding="utf-8") as f:
87
- for q, r in zip(questions, responses):
88
- f.write(f"Q: {q}\nA: {r}\n\n")
89
-
90
- # Print chatbot responses for debugging
91
- for q, r in zip(questions, responses):
92
- print(f"Q: {q}\nA: {r}\n")
93
-
94
- # ---- STEP 5: Evaluate Using RAGAS ----
95
- from datasets import Dataset
96
- import pandas as pd
97
- from tqdm import tqdm
98
- from ragas import evaluate
99
- from ragas.metrics import (
100
- answer_relevancy,
101
- faithfulness,
102
- context_recall,
103
- answer_correctness,
104
- answer_similarity
105
- )
106
-
107
- def create_ragas_dataset(eval_dataset):
108
- """Convert dataset to RAGAS format."""
109
- df = eval_dataset.to_pandas()
110
- rag_dataset = []
111
- for _, row in df.iterrows():
112
- rag_dataset.append(
113
- {
114
- "question": row["question"],
115
- "answer": row["answer"],
116
- "contexts": ["First aid medical references"],
117
- "ground_truths": [row["ground_truth"]],
118
- "reference": row["context"]
119
- }
120
- )
121
- rag_df = pd.DataFrame(rag_dataset)
122
- return Dataset.from_pandas(rag_df)
123
-
124
- def evaluate_ragas_dataset(ragas_dataset):
125
- """Run RAGAS evaluation with proper handling of required_columns."""
126
- try:
127
- result = evaluate(
128
- ragas_dataset,
129
- metrics=[
130
- faithfulness,
131
- answer_relevancy,
132
- context_recall,
133
- answer_correctness,
134
- answer_similarity
135
- ],
136
- )
137
- return result
138
- except Exception as e:
139
- print("⚠️ RAGAS Error:", e)
140
- raise e
141
-
142
- # Create ground truth dataset
143
- ground_truth_qac_set = pd.DataFrame({
144
- "question": questions,
145
- "answer": responses,
146
- "context": ["First aid medical references"] * len(questions),
147
- "ground_truth": [str(response) for response in ground_truth_responses],
148
- "reference": ["First aid medical references"] * len(questions)
149
- })
150
-
151
- eval_dataset = Dataset.from_pandas(ground_truth_qac_set.astype(str))
152
-
153
- # Save evaluation datasets
154
- eval_dataset.to_csv("groundtruth_eval_dataset.csv")
155
- basic_qa_ragas_dataset = create_ragas_dataset(eval_dataset)
156
- basic_qa_ragas_dataset.to_csv("basic_qa_ragas_dataset.csv")
157
-
158
- # Run evaluation
159
- basic_qa_result = evaluate_ragas_dataset(basic_qa_ragas_dataset)
160
-
161
- print("\n===== Evaluation Results =====")
162
- print(basic_qa_result)
163
-
164
- evaluation_results = basic_qa_result.to_pandas()
165
-
166
- # Save evaluation results as log
167
-
168
- # ---- STEP 6: Log Results to WandB ----
169
-
170
- import wandb
171
- import pandas as pd
172
-
173
- # ✅ Convert `eval_dataset` (Dataset) to Pandas DataFrame
174
- eval_df = eval_dataset.to_pandas()
175
-
176
- # ✅ Convert `basic_qa_ragas_dataset` (Dataset) to Pandas DataFrame
177
- ragas_df = basic_qa_ragas_dataset.to_pandas()
178
-
179
- # ✅ Save DataFrames as CSV
180
- eval_df.to_csv("groundtruth_eval_dataset.csv", index=False)
181
- ragas_df.to_csv("basic_qa_ragas_dataset.csv", index=False)
182
-
183
- # ✅ Initialize WandB
184
- wandb.init(
185
- project="first-aid-tutor",
186
- entity="drsyedfaizan1987-northeastern-university",
187
- name="ragas_evaluation",
188
- notes="Logging evaluation datasets for first-aid chatbot.",
189
- tags=["first-aid", "evaluation", "ragas"]
190
- )
191
-
192
- # ✅ Log DataFrames to WandB as Tables
193
- wandb.log({"basic_qa_ragas_dataset": wandb.Table(dataframe=evaluation_results)})
194
- wandb.log({"groundtruth_eval_dataset": wandb.Table(dataframe=eval_df)})
195
- wandb.log({"basic_qa_ragas_dataset": wandb.Table(dataframe=ragas_df)})
196
-
197
- # ✅ Finish WandB run
198
- wandb.finish()