Upload folder using huggingface_hub
Browse files- .gitattributes +4 -0
- .gitignore +7 -0
- .gradio/certificate.pem +31 -0
- .gradio/flagged/Download Processed Excel/9824a6c316a560c7dada/20250117_103737_processed_output.xlsx +0 -0
- .gradio/flagged/Upload Excel File/2aeafc769523b0a05452/Quick_and_rough_list_for_Ahsan_initial_trials - 10th Dec 2024.xlsx +3 -0
- .gradio/flagged/Upload Excel File/6c7338224e14819dfdc1/Quick_and_rough_list_for_Ahsan_initial_trials - 10th Dec 2024.xlsx +3 -0
- .gradio/flagged/Upload Excel File/fc94c399c41af885dce4/Quick_and_rough_list_for_Ahsan_initial_trials - 10th Dec 2024.xlsx +3 -0
- .gradio/flagged/dataset1.csv +60 -0
- README.md +3 -9
- app.py +279 -0
- functions.py +183 -0
- requirements.txt +5 -0
- testing.ipynb +319 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
.gradio/flagged/Upload[[:space:]]Excel[[:space:]]File/2aeafc769523b0a05452/Quick_and_rough_list_for_Ahsan_initial_trials[[:space:]]-[[:space:]]10th[[:space:]]Dec[[:space:]]2024.xlsx filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
.gradio/flagged/Upload[[:space:]]Excel[[:space:]]File/6c7338224e14819dfdc1/Quick_and_rough_list_for_Ahsan_initial_trials[[:space:]]-[[:space:]]10th[[:space:]]Dec[[:space:]]2024.xlsx filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
.gradio/flagged/Upload[[:space:]]Excel[[:space:]]File/fc94c399c41af885dce4/Quick_and_rough_list_for_Ahsan_initial_trials[[:space:]]-[[:space:]]10th[[:space:]]Dec[[:space:]]2024.xlsx filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
data/Quick_and_rough_list_for_Ahsan_initial_trials[[:space:]]-[[:space:]]10th[[:space:]]Dec[[:space:]]2024.xlsx filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.env
|
| 2 |
+
__pycache__
|
| 3 |
+
llama3.2-vision.ipynb
|
| 4 |
+
.DS_Store
|
| 5 |
+
data/*
|
| 6 |
+
__pycache__
|
| 7 |
+
download_app.py
|
.gradio/certificate.pem
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-----BEGIN CERTIFICATE-----
|
| 2 |
+
MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
|
| 3 |
+
TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
|
| 4 |
+
cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
|
| 5 |
+
WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
|
| 6 |
+
ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
|
| 7 |
+
MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
|
| 8 |
+
h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
|
| 9 |
+
0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
|
| 10 |
+
A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
|
| 11 |
+
T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
|
| 12 |
+
B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
|
| 13 |
+
B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
|
| 14 |
+
KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
|
| 15 |
+
OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
|
| 16 |
+
jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
|
| 17 |
+
qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
|
| 18 |
+
rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
|
| 19 |
+
HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
|
| 20 |
+
hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
|
| 21 |
+
ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
|
| 22 |
+
3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
|
| 23 |
+
NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
|
| 24 |
+
ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
|
| 25 |
+
TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
|
| 26 |
+
jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
|
| 27 |
+
oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
|
| 28 |
+
4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
|
| 29 |
+
mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
|
| 30 |
+
emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
|
| 31 |
+
-----END CERTIFICATE-----
|
.gradio/flagged/Download Processed Excel/9824a6c316a560c7dada/20250117_103737_processed_output.xlsx
ADDED
|
Binary file (7.25 kB). View file
|
|
|
.gradio/flagged/Upload Excel File/2aeafc769523b0a05452/Quick_and_rough_list_for_Ahsan_initial_trials - 10th Dec 2024.xlsx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:526098f685db9b7e7223b763e0efa72069f70cff7b9f0a0bb9f334a7fb91fa4e
|
| 3 |
+
size 139903
|
.gradio/flagged/Upload Excel File/6c7338224e14819dfdc1/Quick_and_rough_list_for_Ahsan_initial_trials - 10th Dec 2024.xlsx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0e322cb97a467369a9f86558bf27d9d1db18e48e869c6499af8c8bc76124ea7
|
| 3 |
+
size 122490
|
.gradio/flagged/Upload Excel File/fc94c399c41af885dce4/Quick_and_rough_list_for_Ahsan_initial_trials - 10th Dec 2024.xlsx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:526098f685db9b7e7223b763e0efa72069f70cff7b9f0a0bb9f334a7fb91fa4e
|
| 3 |
+
size 139903
|
.gradio/flagged/dataset1.csv
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Upload Excel File,Enter Prompt,Evaluation Metrics,Download Processed Excel,timestamp
|
| 2 |
+
.gradio/flagged/Upload Excel File/2aeafc769523b0a05452/Quick_and_rough_list_for_Ahsan_initial_trials - 10th Dec 2024.xlsx,"You are a teacher of philosophy of science.
|
| 3 |
+
I want you to assess whether the use of the expressions ""to prove"", ""proving"", ""proof"", ""proved"", in the provided passages,
|
| 4 |
+
is appropriate, accurate, and logically consistent from the point of view of the rigorous mathematical meaning of the concept of 'proof'.
|
| 5 |
+
|
| 6 |
+
Consider statements of the type ""X cannot be proved"" or ""there is no proof for X"" as appropriate even when they are applied to something
|
| 7 |
+
for which the concept of 'proof' is not appropriate. Consider questions involving the concept of 'proof' as appropriate even when they are
|
| 8 |
+
applied to something for which the concept of 'proof' is not appropriate. You need to respond with a ""+1"" if the use of those
|
| 9 |
+
expressions is appropriate, accurate, and logically consistent and with a ""-1"" otherwise. Proving something wrong is also acceptable.","Error: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].",,2025-01-17 07:49:32.632581
|
| 10 |
+
.gradio/flagged/Upload Excel File/fc94c399c41af885dce4/Quick_and_rough_list_for_Ahsan_initial_trials - 10th Dec 2024.xlsx,"You are a teacher of philosophy of science.
|
| 11 |
+
I want you to assess whether the use of the expressions 'to prove', 'proving', 'proof', 'proved', in the provided passages,
|
| 12 |
+
is appropriate, accurate, and logically consistent from the point of view of the rigorous mathematical meaning of the concept of 'proof'.
|
| 13 |
+
|
| 14 |
+
Consider statements of the type 'X cannot be proved' or 'there is no proof for X' as appropriate even when they are applied to something
|
| 15 |
+
for which the concept of 'proof' is not appropriate. Consider questions involving the concept of 'proof' as appropriate even when they are
|
| 16 |
+
applied to something for which the concept of 'proof' is not appropriate. You need to respond with a '+1' if the use of those
|
| 17 |
+
expressions is appropriate, accurate, and logically consistent and with a '-1' otherwise. Proving something wrong is also acceptable.","'
|
| 18 |
+
<div style=""font-family: Arial, sans-serif;"">
|
| 19 |
+
<h3 style=""color: #2b7a78;"">Evaluation Metrics</h3>
|
| 20 |
+
<p><strong style=""color: #d9534f;"">Accuracy:</strong>
|
| 21 |
+
<span style=""font-size: 1.2em; font-weight: bold; color: #5cb85c;"">0.33</span></p>
|
| 22 |
+
<p><strong>Precision:</strong> N/A</p>
|
| 23 |
+
<p><strong>Recall:</strong> N/A</p>
|
| 24 |
+
<p><strong>F1 Score:</strong> N/A</p>
|
| 25 |
+
<p><strong>Confusion Matrix:</strong><br><table class=""dataframe table table-bordered table-striped"">
|
| 26 |
+
<thead>
|
| 27 |
+
<tr style=""text-align: right;"">
|
| 28 |
+
<th></th>
|
| 29 |
+
<th>Predicted -1.0</th>
|
| 30 |
+
<th>Predicted 0.0</th>
|
| 31 |
+
<th>Predicted 1.0</th>
|
| 32 |
+
</tr>
|
| 33 |
+
</thead>
|
| 34 |
+
<tbody>
|
| 35 |
+
<tr>
|
| 36 |
+
<th>Class -1.0</th>
|
| 37 |
+
<td>3</td>
|
| 38 |
+
<td>0</td>
|
| 39 |
+
<td>0</td>
|
| 40 |
+
</tr>
|
| 41 |
+
<tr>
|
| 42 |
+
<th>Class 0.0</th>
|
| 43 |
+
<td>2</td>
|
| 44 |
+
<td>0</td>
|
| 45 |
+
<td>0</td>
|
| 46 |
+
</tr>
|
| 47 |
+
<tr>
|
| 48 |
+
<th>Class 1.0</th>
|
| 49 |
+
<td>4</td>
|
| 50 |
+
<td>0</td>
|
| 51 |
+
<td>0</td>
|
| 52 |
+
</tr>
|
| 53 |
+
</tbody>
|
| 54 |
+
</table></p>
|
| 55 |
+
</div>
|
| 56 |
+
",.gradio/flagged/Download Processed Excel/9824a6c316a560c7dada/20250117_103737_processed_output.xlsx,2025-01-17 10:41:32.033493
|
| 57 |
+
.gradio/flagged/Upload Excel File/6c7338224e14819dfdc1/Quick_and_rough_list_for_Ahsan_initial_trials - 10th Dec 2024.xlsx,"You are a teacher of philosophy of science.
|
| 58 |
+
I want you to assess whether the use of the expressions 'to prove', 'proving', 'proof', 'proved', in the provided passages, is appropriate, accurate, and logically consistent from the point of view of the rigorous mathematical meaning of the concept of 'proof'.
|
| 59 |
+
|
| 60 |
+
Consider statements of the type 'X cannot be proved' or 'there is no proof for X' as appropriate even when they are applied to something for which the concept of 'proof' is not appropriate. Consider questions involving the concept of 'proof' as appropriate even when they are applied to something for which the concept of 'proof' is not appropriate. You need to respond with a '+1' if the use of those expressions is appropriate, accurate, and logically consistent and with a '-1' otherwise. Proving something wrong is also acceptable.",,,2025-01-18 10:29:24.783349
|
README.md
CHANGED
|
@@ -1,12 +1,6 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji: 💻
|
| 4 |
-
colorFrom: red
|
| 5 |
-
colorTo: green
|
| 6 |
-
sdk: gradio
|
| 7 |
-
sdk_version: 5.29.0
|
| 8 |
app_file: app.py
|
| 9 |
-
|
|
|
|
| 10 |
---
|
| 11 |
-
|
| 12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: kwik-ai
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
app_file: app.py
|
| 4 |
+
sdk: gradio
|
| 5 |
+
sdk_version: 5.12.0
|
| 6 |
---
|
|
|
|
|
|
app.py
ADDED
|
@@ -0,0 +1,279 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dotenv import load_dotenv
|
| 2 |
+
load_dotenv()
|
| 3 |
+
import os
|
| 4 |
+
import json
|
| 5 |
+
import glob
|
| 6 |
+
import gradio as gr
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
from functions import evaluate_dataframe, run_openai_inference
|
| 10 |
+
import openpyxl
|
| 11 |
+
from openpyxl.styles import Alignment
|
| 12 |
+
import traceback
|
| 13 |
+
|
| 14 |
+
# Ensure the output directory exists
|
| 15 |
+
OUTPUT_DIR = "./data/outputs"
|
| 16 |
+
if not os.path.exists(OUTPUT_DIR):
|
| 17 |
+
os.makedirs(OUTPUT_DIR)
|
| 18 |
+
|
| 19 |
+
# Functions for Analysis Interface
|
| 20 |
+
def process_dataframe(df, prompt: str, model: str, max_iterations: int = 5):
|
| 21 |
+
print("Starting process_dataframe function...")
|
| 22 |
+
df['passage'] = (
|
| 23 |
+
df['LeftContext'].astype(str) +
|
| 24 |
+
" <expression>" + df['Keyword'].astype(str) + "</expression> " +
|
| 25 |
+
df['RightContext'].astype(str)
|
| 26 |
+
)
|
| 27 |
+
tasks = []
|
| 28 |
+
indices = []
|
| 29 |
+
print(f"Iterating over rows in random order...")
|
| 30 |
+
for idx, row in df.sample(frac=1.0).iterrows():
|
| 31 |
+
if len(tasks) >= max_iterations:
|
| 32 |
+
print(f"Max iterations reached: {max_iterations}")
|
| 33 |
+
break
|
| 34 |
+
if pd.isna(row['Category']):
|
| 35 |
+
print(f"Skipping row {idx} due to missing 'Category'")
|
| 36 |
+
continue
|
| 37 |
+
indices.append(idx)
|
| 38 |
+
print(f"Scheduling task for row {idx} with passage: {row['passage']}")
|
| 39 |
+
tasks.append(run_openai_inference(prompt, row['passage'], model))
|
| 40 |
+
print(f"Running inference for {len(tasks)} tasks...")
|
| 41 |
+
results = [task for task in tasks]
|
| 42 |
+
print(f"Assigning results to corresponding rows...")
|
| 43 |
+
for i, idx in enumerate(indices):
|
| 44 |
+
score, reason = results[i]
|
| 45 |
+
print(f"Row {idx}: Assigned score: {score}, reason: {reason}")
|
| 46 |
+
df.at[idx, 'Prediction'] = score
|
| 47 |
+
df.at[idx, 'Prediction Reason'] = reason
|
| 48 |
+
print(f"Dropping rows with missing predictions...")
|
| 49 |
+
df_out = df.dropna(subset=['Prediction'])
|
| 50 |
+
print("Finished processing dataframe.")
|
| 51 |
+
return df_out
|
| 52 |
+
|
| 53 |
+
def process_excel(file, prompt, model, slider_value):
|
| 54 |
+
try:
|
| 55 |
+
print("Reading Excel file...")
|
| 56 |
+
df = pd.read_excel(file.name)
|
| 57 |
+
print(f"Excel file read successfully. DataFrame shape: {df.shape}")
|
| 58 |
+
print("Processing DataFrame...")
|
| 59 |
+
df_out = process_dataframe(df, prompt, model, max_iterations=slider_value)
|
| 60 |
+
print(f"DataFrame processed. Output shape: {df_out.shape}")
|
| 61 |
+
print("Evaluating DataFrame...")
|
| 62 |
+
evaluation_results = evaluate_dataframe(df_out['Category'], df_out['Prediction'])
|
| 63 |
+
print(f"Evaluation results: {evaluation_results}")
|
| 64 |
+
print("Generating file paths and timestamps...")
|
| 65 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 66 |
+
output_file = os.path.join(OUTPUT_DIR, f"{timestamp}_processed_output.xlsx")
|
| 67 |
+
metadata_file = os.path.join(OUTPUT_DIR, f"{timestamp}_metadata.json")
|
| 68 |
+
print(f"Output file path: {output_file}")
|
| 69 |
+
print(f"Metadata file path: {metadata_file}")
|
| 70 |
+
print("Writing Excel file with multiple sheets...")
|
| 71 |
+
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
|
| 72 |
+
df.to_excel(writer, sheet_name='Data', index=False)
|
| 73 |
+
inputs_df = pd.DataFrame({
|
| 74 |
+
'Parameter': ['Prompt', 'Model', 'Samples Processed'],
|
| 75 |
+
'Value': [prompt, model, slider_value]
|
| 76 |
+
})
|
| 77 |
+
inputs_df.to_excel(writer, sheet_name='Inputs', index=False)
|
| 78 |
+
outputs_df = pd.DataFrame({
|
| 79 |
+
'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score', 'Samples Classified'],
|
| 80 |
+
'Value': [
|
| 81 |
+
evaluation_results.get('accuracy'),
|
| 82 |
+
evaluation_results.get('precision'),
|
| 83 |
+
evaluation_results.get('recall'),
|
| 84 |
+
evaluation_results.get('f1'),
|
| 85 |
+
df_out.shape[0]
|
| 86 |
+
]
|
| 87 |
+
})
|
| 88 |
+
outputs_df.to_excel(writer, sheet_name='Outputs', index=False)
|
| 89 |
+
print(f"Excel file written to: {output_file}")
|
| 90 |
+
print("Applying formatting to Excel file...")
|
| 91 |
+
wb = openpyxl.load_workbook(output_file)
|
| 92 |
+
for sheet in wb.worksheets:
|
| 93 |
+
for row in sheet.iter_rows():
|
| 94 |
+
for cell in row:
|
| 95 |
+
cell.alignment = Alignment(wrap_text=True)
|
| 96 |
+
for row_idx in range(1, sheet.max_row + 1):
|
| 97 |
+
sheet.row_dimensions[row_idx].height = 75
|
| 98 |
+
wb.save(output_file)
|
| 99 |
+
print(f"Formatting applied and file saved: {output_file}")
|
| 100 |
+
print("Preparing evaluation results for display...")
|
| 101 |
+
accuracy = f"{evaluation_results.get('accuracy'):.2f}" if evaluation_results.get('accuracy') is not None else "N/A"
|
| 102 |
+
precision = f"{evaluation_results.get('precision'):.2f}" if evaluation_results.get('precision') is not None else "N/A"
|
| 103 |
+
recall = f"{evaluation_results.get('recall'):.2f}" if evaluation_results.get('recall') is not None else "N/A"
|
| 104 |
+
f1_score = f"{evaluation_results.get('f1'):.2f}" if evaluation_results.get('f1') is not None else "N/A"
|
| 105 |
+
conf_matrix = evaluation_results.get('conf_matrix', 'N/A')
|
| 106 |
+
eval_display = f"""
|
| 107 |
+
<div style="font-family: Arial, sans-serif;">
|
| 108 |
+
<h3 style="color: #2b7a78;">Evaluation Metrics</h3>
|
| 109 |
+
<p><strong style="color: #d9534f;">Accuracy:</strong>
|
| 110 |
+
<span style="font-size: 1.2em; font-weight: bold; color: #5cb85c;">{accuracy}</span></p>
|
| 111 |
+
<p><strong>Precision:</strong> {precision}</p>
|
| 112 |
+
<p><strong>Recall:</strong> {recall}</p>
|
| 113 |
+
<p><strong>F1 Score:</strong> {f1_score}</p>
|
| 114 |
+
<p><strong>Confusion Matrix:</strong><br>{conf_matrix}</p>
|
| 115 |
+
</div>
|
| 116 |
+
"""
|
| 117 |
+
print(f"Evaluation results prepared for display.")
|
| 118 |
+
print("Saving run metadata...")
|
| 119 |
+
history_data = {
|
| 120 |
+
'date_of_run': timestamp,
|
| 121 |
+
'input_filename': os.path.basename(file.name),
|
| 122 |
+
'output_filename': os.path.basename(output_file),
|
| 123 |
+
'prompt': prompt,
|
| 124 |
+
'model': model,
|
| 125 |
+
'samples_requested': slider_value,
|
| 126 |
+
'samples_classified': df_out.shape[0],
|
| 127 |
+
'accuracy': evaluation_results.get('accuracy')
|
| 128 |
+
}
|
| 129 |
+
with open(metadata_file, "w") as f:
|
| 130 |
+
json.dump(history_data, f)
|
| 131 |
+
print(f"Run metadata saved to: {metadata_file}")
|
| 132 |
+
return eval_display, output_file
|
| 133 |
+
except Exception as e:
|
| 134 |
+
error_message = f"<div style='color: red;'>Error: {str(e)}</div>"
|
| 135 |
+
traceback.print_exc()
|
| 136 |
+
return error_message, None
|
| 137 |
+
|
| 138 |
+
def load_history():
|
| 139 |
+
history_entries = []
|
| 140 |
+
for meta_file in sorted(glob.glob(os.path.join(OUTPUT_DIR, "*_metadata.json")), reverse=True):
|
| 141 |
+
try:
|
| 142 |
+
with open(meta_file, "r") as f:
|
| 143 |
+
data = json.load(f)
|
| 144 |
+
excel_filename = data.get('output_filename', '')
|
| 145 |
+
download_link = f"<a href='{os.path.join(OUTPUT_DIR, excel_filename)}' download>{excel_filename}</a>"
|
| 146 |
+
history_entries.append({
|
| 147 |
+
"Date": data.get("date_of_run", ""),
|
| 148 |
+
"Input Filename": data.get("input_filename", ""),
|
| 149 |
+
"Output Filename": excel_filename,
|
| 150 |
+
"Prompt": data.get("prompt", ""),
|
| 151 |
+
"Model": data.get("model", ""),
|
| 152 |
+
"Samples Requested": data.get("samples_requested", ""),
|
| 153 |
+
"Samples Classified": data.get("samples_classified", ""),
|
| 154 |
+
"Accuracy": f"{data.get('accuracy'):.2f}" if data.get('accuracy') is not None else "N/A",
|
| 155 |
+
"Download": download_link
|
| 156 |
+
})
|
| 157 |
+
except Exception as ex:
|
| 158 |
+
print(f"Error loading history from {meta_file}: {ex}")
|
| 159 |
+
if history_entries:
|
| 160 |
+
table_html = """
|
| 161 |
+
<style>
|
| 162 |
+
.history-table {
|
| 163 |
+
border-collapse: collapse;
|
| 164 |
+
width: 100%;
|
| 165 |
+
font-family: Arial, sans-serif;
|
| 166 |
+
}
|
| 167 |
+
.history-table th, .history-table td {
|
| 168 |
+
padding: 8px;
|
| 169 |
+
text-align: left;
|
| 170 |
+
border: 1px solid #ddd;
|
| 171 |
+
}
|
| 172 |
+
.history-table th.prompt-col, .history-table td.prompt-col {
|
| 173 |
+
min-width: 300px;
|
| 174 |
+
}
|
| 175 |
+
.history-table td span {
|
| 176 |
+
cursor: help;
|
| 177 |
+
}
|
| 178 |
+
</style>
|
| 179 |
+
<table class='history-table'>
|
| 180 |
+
<tr>
|
| 181 |
+
"""
|
| 182 |
+
for col in history_entries[0].keys():
|
| 183 |
+
if col == "Prompt":
|
| 184 |
+
table_html += f"<th class='prompt-col'>{col}</th>"
|
| 185 |
+
else:
|
| 186 |
+
table_html += f"<th>{col}</th>"
|
| 187 |
+
table_html += "</tr>"
|
| 188 |
+
for entry in history_entries:
|
| 189 |
+
table_html += "<tr>"
|
| 190 |
+
for key, value in entry.items():
|
| 191 |
+
if key == "Prompt":
|
| 192 |
+
full_prompt = value
|
| 193 |
+
clipped = full_prompt if len(full_prompt) <= 100 else full_prompt[:100] + "..."
|
| 194 |
+
table_html += f"<td class='prompt-col'><span title='{full_prompt}'>{clipped}</span></td>"
|
| 195 |
+
else:
|
| 196 |
+
table_html += f"<td>{value}</td>"
|
| 197 |
+
table_html += "</tr>"
|
| 198 |
+
table_html += "</table>"
|
| 199 |
+
else:
|
| 200 |
+
table_html = "<p>No history available.</p>"
|
| 201 |
+
return table_html
|
| 202 |
+
|
| 203 |
+
# Functions for Other Interface
|
| 204 |
+
def classify_text(text, model):
|
| 205 |
+
"""Placeholder function for text classification."""
|
| 206 |
+
return f"Classified as: {model} - {text}"
|
| 207 |
+
|
| 208 |
+
# Interface Creation Functions
|
| 209 |
+
def create_analysis_interface():
|
| 210 |
+
default_prompt = (
|
| 211 |
+
"You are a teacher of philosophy of science. \n"
|
| 212 |
+
"I want you to assess whether the use of the expressions 'to prove', 'proving', 'proof', 'proved', in the provided passages, \n"
|
| 213 |
+
"is appropriate, accurate, and logically consistent from the point of view of the rigorous mathematical meaning of the concept of 'proof'. \n\n"
|
| 214 |
+
"Consider statements of the type 'X cannot be proved' or 'there is no proof for X' as appropriate even when they are applied to something \n"
|
| 215 |
+
"for which the concept of 'proof' is not appropriate. Consider questions involving the concept of 'proof' as appropriate even when they are \n"
|
| 216 |
+
"applied to something for which the concept of 'proof' is not appropriate. You need to respond with a '+1' if the use of those \n"
|
| 217 |
+
"expressions is appropriate, accurate, and logically consistent and with a '-1' otherwise. Proving something wrong is also acceptable."
|
| 218 |
+
)
|
| 219 |
+
with gr.Blocks() as demo:
|
| 220 |
+
gr.Markdown("# AI Text Analysis with History")
|
| 221 |
+
with gr.Row():
|
| 222 |
+
file_input = gr.File(label="Upload Excel File")
|
| 223 |
+
prompt_input = gr.Textbox(label="Enter Prompt", placeholder="Type something here...", value=default_prompt, lines=10)
|
| 224 |
+
with gr.Row():
|
| 225 |
+
model_input = gr.Dropdown(label="Select Model", choices=["gpt-4o", "gpt-4o-mini"], value="gpt-4o-mini")
|
| 226 |
+
slider_input = gr.Slider(label="Number of Rows to Process", minimum=5, maximum=100, step=5, value=5)
|
| 227 |
+
submit_btn = gr.Button("Process File")
|
| 228 |
+
eval_output = gr.HTML(label="Evaluation Metrics")
|
| 229 |
+
file_output = gr.File(label="Download Processed Excel")
|
| 230 |
+
submit_btn.click(fn=process_excel, inputs=[file_input, prompt_input, model_input, slider_input], outputs=[eval_output, file_output])
|
| 231 |
+
gr.Markdown("## History")
|
| 232 |
+
refresh_btn = gr.Button("Refresh History")
|
| 233 |
+
history_output = gr.HTML(label="History Table")
|
| 234 |
+
refresh_btn.click(fn=load_history, inputs=[], outputs=history_output)
|
| 235 |
+
return demo
|
| 236 |
+
|
| 237 |
+
def create_inference_interface():
|
| 238 |
+
default_prompt = (
|
| 239 |
+
"You are a teacher of philosophy of science. \n"
|
| 240 |
+
"I want you to assess whether the use of the expressions 'to prove', 'proving', 'proof', 'proved', in the provided passages, \n"
|
| 241 |
+
"is appropriate, accurate, and logically consistent from the point of view of the rigorous mathematical meaning of the concept of 'proof'. \n\n"
|
| 242 |
+
"Consider statements of the type 'X cannot be proved' or 'there is no proof for X' as appropriate even when they are applied to something \n"
|
| 243 |
+
"for which the concept of 'proof' is not appropriate. Consider questions involving the concept of 'proof' as appropriate even when they are \n"
|
| 244 |
+
"applied to something for which the concept of 'proof' is not appropriate. You need to respond with a '+1' if the use of those \n"
|
| 245 |
+
"expressions is appropriate, accurate, and logically consistent and with a '-1' otherwise. Proving something wrong is also acceptable."
|
| 246 |
+
)
|
| 247 |
+
with gr.Blocks() as demo:
|
| 248 |
+
gr.Markdown("# AI Text Inference Interface")
|
| 249 |
+
with gr.Row():
|
| 250 |
+
file_input = gr.File(label="Upload Excel File")
|
| 251 |
+
prompt_input = gr.Textbox(label="Enter Prompt", placeholder="Type something here...", value=default_prompt, lines=10)
|
| 252 |
+
with gr.Row():
|
| 253 |
+
model_input = gr.Dropdown(label="Select Model", choices=["gpt-4o", "gpt-4o-mini"], value="gpt-4o-mini")
|
| 254 |
+
slider_input = gr.Slider(label="Number of Rows to Process", minimum=5, maximum=100, step=5, value=5)
|
| 255 |
+
submit_btn = gr.Button("Process File")
|
| 256 |
+
eval_output = gr.HTML(label="Evaluation Metrics")
|
| 257 |
+
file_output = gr.File(label="Download Processed Excel")
|
| 258 |
+
submit_btn.click(fn=process_excel, inputs=[file_input, prompt_input, model_input, slider_input], outputs=[eval_output, file_output])
|
| 259 |
+
gr.Markdown("## History")
|
| 260 |
+
refresh_btn = gr.Button("Refresh History")
|
| 261 |
+
history_output = gr.HTML(label="History Table")
|
| 262 |
+
refresh_btn.click(fn=load_history, inputs=[], outputs=history_output)
|
| 263 |
+
return demo
|
| 264 |
+
|
| 265 |
+
# Main Execution
|
| 266 |
+
if __name__ == "__main__":
|
| 267 |
+
interface_type = os.getenv("INTERFACE_TYPE") # Default to "analysis" if not set
|
| 268 |
+
if interface_type == "analysis":
|
| 269 |
+
static_paths = [OUTPUT_DIR]
|
| 270 |
+
gr.set_static_paths(paths=static_paths)
|
| 271 |
+
print(f"static files will be served from folder: {static_paths}")
|
| 272 |
+
demo = create_analysis_interface()
|
| 273 |
+
elif interface_type == "inference":
|
| 274 |
+
demo = create_inference_interface()
|
| 275 |
+
else:
|
| 276 |
+
raise ValueError(f"Invalid INTERFACE_TYPE: {interface_type}")
|
| 277 |
+
demo.launch(share=True)
|
| 278 |
+
|
| 279 |
+
# done merging. let's see
|
functions.py
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dotenv import load_dotenv
|
| 2 |
+
load_dotenv()
|
| 3 |
+
|
| 4 |
+
import os
|
| 5 |
+
import json
|
| 6 |
+
import pandas as pd
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
import openpyxl
|
| 9 |
+
from openpyxl.utils import get_column_letter
|
| 10 |
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
|
| 11 |
+
|
| 12 |
+
# Set up global paths and history file
|
| 13 |
+
OUTPUT_DIR = "./data/outputs"
|
| 14 |
+
HISTORY_FILE = "./data/history.json"
|
| 15 |
+
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 16 |
+
if not os.path.exists("./data"):
|
| 17 |
+
os.makedirs("./data", exist_ok=True)
|
| 18 |
+
|
| 19 |
+
# Load history if exists, otherwise initialize an empty dictionary
|
| 20 |
+
if os.path.exists(HISTORY_FILE):
|
| 21 |
+
with open(HISTORY_FILE, "r") as f:
|
| 22 |
+
history = json.load(f)
|
| 23 |
+
else:
|
| 24 |
+
history = {}
|
| 25 |
+
|
| 26 |
+
# Import the OpenAI library.
|
| 27 |
+
import openai
|
| 28 |
+
client = openai
|
| 29 |
+
|
| 30 |
+
def run_openai_inference(prompt: str, passage: str, model: str):
|
| 31 |
+
passage_prompt = f"""
|
| 32 |
+
Here is the passage you need to analyze:
|
| 33 |
+
<passage>
|
| 34 |
+
{passage}
|
| 35 |
+
</passage>
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
system_prompt = f"{prompt}\n\n{passage_prompt}"
|
| 40 |
+
|
| 41 |
+
# print(f"passage_prompt: {system_prompt}", end="\n\n")
|
| 42 |
+
|
| 43 |
+
format = """
|
| 44 |
+
Based on the identified type, extract and return the following data:
|
| 45 |
+
- score
|
| 46 |
+
**Output format:**
|
| 47 |
+
{ "score": "return numeric score here", "reason": "return a short one liner reason for your score here" }
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
# print(f"system prompt is: {system_prompt}")
|
| 51 |
+
|
| 52 |
+
completion = client.chat.completions.create(
|
| 53 |
+
model=model,
|
| 54 |
+
messages=[
|
| 55 |
+
{"role": "system", "content": system_prompt},
|
| 56 |
+
{"role": "user", "content": format},
|
| 57 |
+
]
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
# print(completion.choices[0].message.content)
|
| 61 |
+
try:
|
| 62 |
+
score = int(eval(completion.choices[0].message.content)['score'])
|
| 63 |
+
reason = eval(completion.choices[0].message.content)['reason']
|
| 64 |
+
except:
|
| 65 |
+
score = None
|
| 66 |
+
reason = None
|
| 67 |
+
|
| 68 |
+
return score, reason
|
| 69 |
+
|
| 70 |
+
def process_dataframe(df, prompt: str, model: str, max_iterations: int = 5):
|
| 71 |
+
print("Starting process_dataframe function...")
|
| 72 |
+
|
| 73 |
+
# Create a new column 'passage' by combining existing columns.
|
| 74 |
+
print("Creating 'passage' column...")
|
| 75 |
+
df['passage'] = (
|
| 76 |
+
df['LeftContext'].astype(str) +
|
| 77 |
+
" <expression>" + df['Keyword'].astype(str) + "</expression> " +
|
| 78 |
+
df['RightContext'].astype(str)
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
tasks = []
|
| 82 |
+
indices = [] # store row indices corresponding to scheduled tasks
|
| 83 |
+
print(f"Iterating over rows in random order...")
|
| 84 |
+
|
| 85 |
+
# Iterate over rows in random order
|
| 86 |
+
for idx, row in df.sample(frac=1.0).iterrows():
|
| 87 |
+
if len(tasks) >= max_iterations:
|
| 88 |
+
print(f"Max iterations reached: {max_iterations}")
|
| 89 |
+
break
|
| 90 |
+
if pd.isna(row['Category']):
|
| 91 |
+
print(f"Skipping row {idx} due to missing 'Category'")
|
| 92 |
+
continue
|
| 93 |
+
indices.append(idx)
|
| 94 |
+
print(f"Scheduling task for row {idx} with passage: {row['passage']}")
|
| 95 |
+
tasks.append(run_openai_inference(prompt, row['passage'], model))
|
| 96 |
+
|
| 97 |
+
# Run the inference and collect results
|
| 98 |
+
print(f"Running inference for {len(tasks)} tasks...")
|
| 99 |
+
results = [task for task in tasks] # You can replace this line with asyncio.gather to run tasks concurrently
|
| 100 |
+
|
| 101 |
+
# Assign the results to the corresponding rows in the DataFrame
|
| 102 |
+
print(f"Assigning results to corresponding rows...")
|
| 103 |
+
for i, idx in enumerate(indices):
|
| 104 |
+
score, reason = results[i]
|
| 105 |
+
print(f"Row {idx}: Assigned score: {score}, reason: {reason}")
|
| 106 |
+
df.at[idx, 'Prediction'] = score
|
| 107 |
+
df.at[idx, 'Prediction Reason'] = reason
|
| 108 |
+
|
| 109 |
+
# Remove rows with missing predictions
|
| 110 |
+
print(f"Dropping rows with missing predictions...")
|
| 111 |
+
df_out = df.dropna(subset=['Prediction'])
|
| 112 |
+
|
| 113 |
+
print("Finished processing dataframe.")
|
| 114 |
+
return df_out
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def evaluate_dataframe(y_true, y_pred):
|
| 118 |
+
try:
|
| 119 |
+
accuracy = accuracy_score(y_true, y_pred)
|
| 120 |
+
except:
|
| 121 |
+
accuracy = None
|
| 122 |
+
try:
|
| 123 |
+
precision = precision_score(y_true, y_pred, average='binary')
|
| 124 |
+
except:
|
| 125 |
+
precision = None
|
| 126 |
+
try:
|
| 127 |
+
recall = recall_score(y_true, y_pred, average='binary')
|
| 128 |
+
except:
|
| 129 |
+
recall = None
|
| 130 |
+
try:
|
| 131 |
+
f1 = f1_score(y_true, y_pred, average='binary')
|
| 132 |
+
except:
|
| 133 |
+
f1 = None
|
| 134 |
+
try:
|
| 135 |
+
cm = confusion_matrix(y_true, y_pred)
|
| 136 |
+
conf_matrix = cm.tolist() # convert to list for easier JSON serialization
|
| 137 |
+
except:
|
| 138 |
+
conf_matrix = None
|
| 139 |
+
return {
|
| 140 |
+
"accuracy": accuracy,
|
| 141 |
+
"precision": precision,
|
| 142 |
+
"recall": recall,
|
| 143 |
+
"f1": f1,
|
| 144 |
+
"conf_matrix": conf_matrix
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
def save_results(df_out, prompt, model):
|
| 148 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 149 |
+
output_file = os.path.join(OUTPUT_DIR, f"{timestamp}_processed.xlsx")
|
| 150 |
+
# Create a DataFrame for the prompt/model info
|
| 151 |
+
df_prompt = pd.DataFrame({"Prompt": [prompt], "Model": [model]})
|
| 152 |
+
# Write the outputs and inputs to separate sheets using Pandas ExcelWriter
|
| 153 |
+
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
|
| 154 |
+
df_out.to_excel(writer, sheet_name="Outputs", index=False)
|
| 155 |
+
df_prompt.to_excel(writer, sheet_name="Inputs", index=False)
|
| 156 |
+
# No need to call writer.save() here—the context manager handles it.
|
| 157 |
+
|
| 158 |
+
# Adjust column widths in both sheets using openpyxl
|
| 159 |
+
wb = openpyxl.load_workbook(output_file)
|
| 160 |
+
for sheet_name, df in [("Outputs", df_out), ("Inputs", df_prompt)]:
|
| 161 |
+
ws = wb[sheet_name]
|
| 162 |
+
for idx, col in enumerate(df.columns, 1):
|
| 163 |
+
max_length = max((len(str(cell)) for cell in df[col].values), default=0)
|
| 164 |
+
max_length = max(max_length, len(col)) + 2
|
| 165 |
+
col_letter = get_column_letter(idx)
|
| 166 |
+
ws.column_dimensions[col_letter].width = max_length
|
| 167 |
+
wb.save(output_file)
|
| 168 |
+
|
| 169 |
+
# Update history with run details
|
| 170 |
+
history[timestamp] = {
|
| 171 |
+
"file": output_file,
|
| 172 |
+
"prompt": prompt,
|
| 173 |
+
"model": model,
|
| 174 |
+
"score": df_out['Prediction'].mean() if not df_out['Prediction'].empty else None,
|
| 175 |
+
"samples": len(df_out)
|
| 176 |
+
}
|
| 177 |
+
with open(HISTORY_FILE, "w") as f:
|
| 178 |
+
json.dump(history, f, indent=4)
|
| 179 |
+
return output_file
|
| 180 |
+
|
| 181 |
+
def list_previous_files():
|
| 182 |
+
# Return the history of processed files as a dictionary
|
| 183 |
+
return history
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
python-dotenv==1.0.1
|
| 2 |
+
pandas==2.2.3
|
| 3 |
+
scikit-learn==1.6.1
|
| 4 |
+
openai==1.59.7
|
| 5 |
+
openpyxl==3.1.5
|
testing.ipynb
ADDED
|
@@ -0,0 +1,319 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 2,
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"outputs": [],
|
| 8 |
+
"source": [
|
| 9 |
+
"import pandas as pd\n",
|
| 10 |
+
"filename = \"./data/Quick_and_rough_list_for_Ahsan_initial_trials - 10th Dec 2024.xlsx\"\n",
|
| 11 |
+
"df = pd.read_excel(filename)"
|
| 12 |
+
]
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"execution_count": 13,
|
| 17 |
+
"metadata": {},
|
| 18 |
+
"outputs": [
|
| 19 |
+
{
|
| 20 |
+
"data": {
|
| 21 |
+
"text/plain": [
|
| 22 |
+
"-1"
|
| 23 |
+
]
|
| 24 |
+
},
|
| 25 |
+
"execution_count": 13,
|
| 26 |
+
"metadata": {},
|
| 27 |
+
"output_type": "execute_result"
|
| 28 |
+
}
|
| 29 |
+
],
|
| 30 |
+
"source": []
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"cell_type": "code",
|
| 34 |
+
"execution_count": 1,
|
| 35 |
+
"metadata": {},
|
| 36 |
+
"outputs": [],
|
| 37 |
+
"source": [
|
| 38 |
+
"import os\n",
|
| 39 |
+
"os.environ['OPENAI_API_KEY'] = api_key='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'"
|
| 40 |
+
]
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"cell_type": "code",
|
| 44 |
+
"execution_count": 2,
|
| 45 |
+
"metadata": {},
|
| 46 |
+
"outputs": [],
|
| 47 |
+
"source": [
|
| 48 |
+
"from openai import OpenAI\n",
|
| 49 |
+
"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report\n",
|
| 50 |
+
"import pandas as pd\n",
|
| 51 |
+
"from tqdm import tqdm"
|
| 52 |
+
]
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"cell_type": "code",
|
| 56 |
+
"execution_count": 32,
|
| 57 |
+
"metadata": {},
|
| 58 |
+
"outputs": [
|
| 59 |
+
{
|
| 60 |
+
"name": "stderr",
|
| 61 |
+
"output_type": "stream",
|
| 62 |
+
"text": [
|
| 63 |
+
"Computing AI Outputs: 1%| | 5/842 [00:01<05:16, 2.65it/s]"
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"name": "stdout",
|
| 68 |
+
"output_type": "stream",
|
| 69 |
+
"text": [
|
| 70 |
+
"passage: or how we can stand on this earth. Science is just one of the methods which can help us to <expression>prove </expression> that our ideas are correct and true. Accordingly, it is dangerous to rely on science to decide everything. Also, \n",
|
| 71 |
+
"gtruth: -1.0, pred: -1, pred_reason: The use of 'prove' in this context is not rigorous or appropriate, as science typically deals with empirical evidence rather than mathematical proof.\n",
|
| 72 |
+
"\n"
|
| 73 |
+
]
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"name": "stderr",
|
| 77 |
+
"output_type": "stream",
|
| 78 |
+
"text": [
|
| 79 |
+
"Computing AI Outputs: 2%|▏ | 15/842 [00:06<06:16, 2.20it/s]"
|
| 80 |
+
]
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"name": "stdout",
|
| 84 |
+
"output_type": "stream",
|
| 85 |
+
"text": [
|
| 86 |
+
"passage: culture, scientific facts could be repeated infinite times. Take Galileo’s famous experiment on falling bodies as an example, to <expression>prove </expression> that the rate of a falling body is independent from its mess, Galileo dropped two cannonballs of different weights \n",
|
| 87 |
+
"gtruth: -1.0, pred: -1, pred_reason: The term 'prove' is used incorrectly as scientific experiments can support theories but do not provide definitive proof in the mathematical sense.\n",
|
| 88 |
+
"\n"
|
| 89 |
+
]
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"name": "stderr",
|
| 93 |
+
"output_type": "stream",
|
| 94 |
+
"text": [
|
| 95 |
+
"Computing AI Outputs: 2%|▏ | 16/842 [00:08<07:36, 1.81it/s]"
|
| 96 |
+
]
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"name": "stdout",
|
| 100 |
+
"output_type": "stream",
|
| 101 |
+
"text": [
|
| 102 |
+
"passage: seen as a rational, impartial, neutral bystander when facing social issues. It poses to us facts and numerical evidence to <expression>prove </expression> hypotheses and clear guesses or stereotypes. But is sticking to science enough? Scientific advancements have profoundly transformed our society, \n",
|
| 103 |
+
"gtruth: -1.0, pred: -1, pred_reason: The use of 'prove' in this context is inappropriate and inaccurate as it suggests a definitive proof typical of mathematics, whereas scientific hypotheses are generally subject to revision and do not reach absolute proof.\n",
|
| 104 |
+
"\n"
|
| 105 |
+
]
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"name": "stderr",
|
| 109 |
+
"output_type": "stream",
|
| 110 |
+
"text": [
|
| 111 |
+
"Computing AI Outputs: 2%|▏ | 19/842 [00:09<07:04, 1.94it/s]"
|
| 112 |
+
]
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"name": "stdout",
|
| 116 |
+
"output_type": "stream",
|
| 117 |
+
"text": [
|
| 118 |
+
"passage: pursuing scientific progress and maintaining the humanity and integrity of the law. Meanwhile, as the rapid development of science may <expression>prove </expression> that some values of society, even those preserved in law, are plainly wrong, the legal education must also highlight \n",
|
| 119 |
+
"gtruth: -1.0, pred: -1, pred_reason: The term 'prove' is misapplied as it implies a mathematical certainty rather than a scientific or ethical assertion.\n",
|
| 120 |
+
"\n"
|
| 121 |
+
]
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"name": "stderr",
|
| 125 |
+
"output_type": "stream",
|
| 126 |
+
"text": [
|
| 127 |
+
"Computing AI Outputs: 3%|▎ | 26/842 [00:10<05:30, 2.47it/s]"
|
| 128 |
+
]
|
| 129 |
+
},
|
| 130 |
+
{
|
| 131 |
+
"name": "stdout",
|
| 132 |
+
"output_type": "stream",
|
| 133 |
+
"text": [
|
| 134 |
+
"passage: an accurate result that no one can deny. To provide that result, one should follow biological gender since it is <expression>proved </expression> by science. However, gender identity can be hardly proved, and it can be different from the perspectives of individuals. \n",
|
| 135 |
+
"gtruth: -1.0, pred: -1, pred_reason: The use of 'proved' in relation to biological gender is misleading as it implies a mathematical proof rather than scientific evidence.\n",
|
| 136 |
+
"\n"
|
| 137 |
+
]
|
| 138 |
+
},
|
| 139 |
+
{
|
| 140 |
+
"name": "stderr",
|
| 141 |
+
"output_type": "stream",
|
| 142 |
+
"text": [
|
| 143 |
+
"\n",
|
| 144 |
+
"/Users/ahsan/opt/miniconda3/envs/kwik-ai/lib/python3.12/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
| 145 |
+
" _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
|
| 146 |
+
"/Users/ahsan/opt/miniconda3/envs/kwik-ai/lib/python3.12/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 due to no true samples. Use `zero_division` parameter to control this behavior.\n",
|
| 147 |
+
" _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
|
| 148 |
+
"/Users/ahsan/opt/miniconda3/envs/kwik-ai/lib/python3.12/site-packages/sklearn/metrics/_classification.py:1565: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no true nor predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
| 149 |
+
" _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
|
| 150 |
+
"/Users/ahsan/opt/miniconda3/envs/kwik-ai/lib/python3.12/site-packages/sklearn/metrics/_classification.py:407: UserWarning: A single label was found in 'y_true' and 'y_pred'. For the confusion matrix to have the correct shape, use the 'labels' parameter to pass all known labels.\n",
|
| 151 |
+
" warnings.warn(\n"
|
| 152 |
+
]
|
| 153 |
+
},
|
| 154 |
+
{
|
| 155 |
+
"data": {
|
| 156 |
+
"text/plain": [
|
| 157 |
+
"{'accuracy': 1.0,\n",
|
| 158 |
+
" 'precision': 0.0,\n",
|
| 159 |
+
" 'recall': 0.0,\n",
|
| 160 |
+
" 'f1': 0.0,\n",
|
| 161 |
+
" 'conf_matrix': array([[4]])}"
|
| 162 |
+
]
|
| 163 |
+
},
|
| 164 |
+
"execution_count": 32,
|
| 165 |
+
"metadata": {},
|
| 166 |
+
"output_type": "execute_result"
|
| 167 |
+
}
|
| 168 |
+
],
|
| 169 |
+
"source": [
|
| 170 |
+
"client = OpenAI()\n",
|
| 171 |
+
"\n",
|
| 172 |
+
"def run_openai_inference(prompt: str, passage: str):\n",
|
| 173 |
+
" passage_prompt = f\"\"\"\n",
|
| 174 |
+
" Here is the passage you need to analyze:\n",
|
| 175 |
+
" <passage>\n",
|
| 176 |
+
" {passage}\n",
|
| 177 |
+
" </passage>\n",
|
| 178 |
+
"\n",
|
| 179 |
+
" \"\"\"\n",
|
| 180 |
+
"\n",
|
| 181 |
+
"\n",
|
| 182 |
+
" system_prompt = f\"{prompt}\\n\\n{passage_prompt}\"\n",
|
| 183 |
+
"\n",
|
| 184 |
+
" # print(f\"passage_prompt: {system_prompt}\", end=\"\\n\\n\")\n",
|
| 185 |
+
"\n",
|
| 186 |
+
" format = \"\"\"\n",
|
| 187 |
+
" Based on the identified type, extract and return the following data:\n",
|
| 188 |
+
" - score\n",
|
| 189 |
+
"\n",
|
| 190 |
+
" **Output format:**\n",
|
| 191 |
+
" { \"score\": \"return numeric score here\", \"reason\": \"return a short one liner reason for your score here\" }\n",
|
| 192 |
+
" \"\"\"\n",
|
| 193 |
+
"\n",
|
| 194 |
+
" # print(f\"system prompt is: {system_prompt}\")\n",
|
| 195 |
+
"\n",
|
| 196 |
+
" completion = client.chat.completions.create(\n",
|
| 197 |
+
" model=\"gpt-4o-mini\",\n",
|
| 198 |
+
" messages=[\n",
|
| 199 |
+
" {\"role\": \"system\", \"content\": system_prompt},\n",
|
| 200 |
+
" {\"role\": \"user\", \"content\": format},\n",
|
| 201 |
+
" ]\n",
|
| 202 |
+
" )\n",
|
| 203 |
+
"\n",
|
| 204 |
+
" # print(completion.choices[0].message.content)\n",
|
| 205 |
+
" try:\n",
|
| 206 |
+
" score = int(eval(completion.choices[0].message.content)['score'])\n",
|
| 207 |
+
" reason = eval(completion.choices[0].message.content)['reason']\n",
|
| 208 |
+
" except:\n",
|
| 209 |
+
" score = None\n",
|
| 210 |
+
" reason = None\n",
|
| 211 |
+
"\n",
|
| 212 |
+
" return score, reason\n",
|
| 213 |
+
"\n",
|
| 214 |
+
"# Function to process DataFrame and compute predictions\n",
|
| 215 |
+
"def process_dataframe(df, prompt: str, max_iterations: int = 5):\n",
|
| 216 |
+
" df['passage'] = df['LeftContext'] + \" <expression>\" + df['Keyword'] + \"</expression> \" + df['RightContext']\n",
|
| 217 |
+
"\n",
|
| 218 |
+
" i = 0\n",
|
| 219 |
+
" for idx, row in tqdm(df.sample(frac=1.0).iterrows(), total=df.shape[0], desc='Computing AI Outputs'): # randomizing the df\n",
|
| 220 |
+
" try:\n",
|
| 221 |
+
" gtruth = row['Category']\n",
|
| 222 |
+
"\n",
|
| 223 |
+
" # Check if ground truth is NaN\n",
|
| 224 |
+
" if pd.isna(gtruth):\n",
|
| 225 |
+
" raise ValueError('skipping')\n",
|
| 226 |
+
" except:\n",
|
| 227 |
+
" continue\n",
|
| 228 |
+
"\n",
|
| 229 |
+
" pred_score, pred_reason = run_openai_inference(prompt, row['passage'])\n",
|
| 230 |
+
" print(f\"passage: {row['passage']}\")\n",
|
| 231 |
+
" print(f\"gtruth: {int(gtruth)}, pred: {pred_score}, pred_reason: {pred_reason}\", end=\"\\n\\n\")\n",
|
| 232 |
+
" i += 1\n",
|
| 233 |
+
"\n",
|
| 234 |
+
" if i == max_iterations:\n",
|
| 235 |
+
" break\n",
|
| 236 |
+
"\n",
|
| 237 |
+
" df.loc[idx, 'Prediction'] = pred_score\n",
|
| 238 |
+
" df.loc[idx, 'Prediction Reason'] = pred_reason\n",
|
| 239 |
+
"\n",
|
| 240 |
+
" df_out = df.dropna(subset=['Prediction'])\n",
|
| 241 |
+
" return df_out\n",
|
| 242 |
+
"\n",
|
| 243 |
+
"# Ground truth and predictions\n",
|
| 244 |
+
"def evaluate_dataframe(y_true, y_pred):\n",
|
| 245 |
+
" # Compute metrics\n",
|
| 246 |
+
" try:\n",
|
| 247 |
+
" accuracy = accuracy_score(y_true, y_pred)\n",
|
| 248 |
+
" except:\n",
|
| 249 |
+
" accuracy = None\n",
|
| 250 |
+
" \n",
|
| 251 |
+
" try:\n",
|
| 252 |
+
" precision = precision_score(y_true, y_pred, average='binary') # Use 'macro', 'micro', or 'weighted' for multiclass\n",
|
| 253 |
+
" except:\n",
|
| 254 |
+
" precision = None\n",
|
| 255 |
+
"\n",
|
| 256 |
+
" try:\n",
|
| 257 |
+
" recall = recall_score(y_true, y_pred, average='binary')\n",
|
| 258 |
+
" except:\n",
|
| 259 |
+
" recall = None\n",
|
| 260 |
+
"\n",
|
| 261 |
+
" try:\n",
|
| 262 |
+
" f1 = f1_score(y_true, y_pred, average='binary')\n",
|
| 263 |
+
" except:\n",
|
| 264 |
+
" f1 = None\n",
|
| 265 |
+
"\n",
|
| 266 |
+
" try:\n",
|
| 267 |
+
" conf_matrix = confusion_matrix(y_true, y_pred)\n",
|
| 268 |
+
" except:\n",
|
| 269 |
+
" conf_matrix = None\n",
|
| 270 |
+
" \n",
|
| 271 |
+
" return {\n",
|
| 272 |
+
" \"accuracy\": accuracy,\n",
|
| 273 |
+
" \"precision\": precision,\n",
|
| 274 |
+
" \"recall\": recall,\n",
|
| 275 |
+
" \"f1\": f1,\n",
|
| 276 |
+
" \"conf_matrix\": conf_matrix\n",
|
| 277 |
+
" }\n",
|
| 278 |
+
"\n",
|
| 279 |
+
"\n",
|
| 280 |
+
"prompt = f\"\"\"\n",
|
| 281 |
+
"You are a teacher of philosophy of science. \n",
|
| 282 |
+
"I want you to assess whether the use of the expressions \"to prove\", \"proving\", \"proof\", \"proved\", in the provided passages, \n",
|
| 283 |
+
"is appropriate, accurate, and logically consistent from the point of view of the rigorous mathematical meaning of the concept of 'proof'. \n",
|
| 284 |
+
"\n",
|
| 285 |
+
"Consider statements of the type \"X cannot be proved\" or \"there is no proof for X\" as appropriate even when they are applied to something \n",
|
| 286 |
+
"for which the concept of 'proof' is not appropriate. Consider questions involving the concept of 'proof' as appropriate even when they are \n",
|
| 287 |
+
"applied to something for which the concept of 'proof' is not appropriate. You need to respond with a \"+1\" if the use of those \n",
|
| 288 |
+
"expressions is appropriate, accurate, and logically consistent and with a \"-1\" otherwise. Proving something wrong is also acceptable.\n",
|
| 289 |
+
"\"\"\"\n",
|
| 290 |
+
"\n",
|
| 291 |
+
"df = pd.read_excel(\"./data/Quick_and_rough_list_for_Ahsan_initial_trials - 10th Dec 2024.xlsx\", sheet_name='Data')\n",
|
| 292 |
+
"\n",
|
| 293 |
+
"df_out = process_dataframe(df, prompt, max_iterations=5)\n",
|
| 294 |
+
"evaluate_dataframe(df_out['Category'], df_out['Prediction'])"
|
| 295 |
+
]
|
| 296 |
+
}
|
| 297 |
+
],
|
| 298 |
+
"metadata": {
|
| 299 |
+
"kernelspec": {
|
| 300 |
+
"display_name": "kwik-ai",
|
| 301 |
+
"language": "python",
|
| 302 |
+
"name": "python3"
|
| 303 |
+
},
|
| 304 |
+
"language_info": {
|
| 305 |
+
"codemirror_mode": {
|
| 306 |
+
"name": "ipython",
|
| 307 |
+
"version": 3
|
| 308 |
+
},
|
| 309 |
+
"file_extension": ".py",
|
| 310 |
+
"mimetype": "text/x-python",
|
| 311 |
+
"name": "python",
|
| 312 |
+
"nbconvert_exporter": "python",
|
| 313 |
+
"pygments_lexer": "ipython3",
|
| 314 |
+
"version": "3.12.8"
|
| 315 |
+
}
|
| 316 |
+
},
|
| 317 |
+
"nbformat": 4,
|
| 318 |
+
"nbformat_minor": 2
|
| 319 |
+
}
|