Spaces:
Running
Running
= commited on
Commit Β·
ad36ee4
1
Parent(s): fa1c690
up works
Browse files- OCNotebook.jl +131 -10
- utils.jl +38 -0
OCNotebook.jl
CHANGED
|
@@ -31,17 +31,69 @@ md" *A Misinformation Remediation Company*"
|
|
| 31 |
# βββ‘ 86b197ec-84f7-4fc1-9abf-e4244e853ce7
|
| 32 |
begin
|
| 33 |
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
html_rows = String[]
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
|
|
|
| 40 |
# Data rows
|
| 41 |
for row in eachrow(df)
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
end
|
| 44 |
|
|
|
|
| 45 |
table_html = """
|
| 46 |
<style>
|
| 47 |
table.pretty-table {
|
|
@@ -49,7 +101,6 @@ begin
|
|
| 49 |
width: 100%;
|
| 50 |
font-family: system-ui, sans-serif;
|
| 51 |
margin-top: 10px;
|
| 52 |
-
color: #eee;
|
| 53 |
background-color: transparent;
|
| 54 |
}
|
| 55 |
.pretty-table th, .pretty-table td {
|
|
@@ -58,6 +109,7 @@ begin
|
|
| 58 |
}
|
| 59 |
.pretty-table th {
|
| 60 |
background-color: #333;
|
|
|
|
| 61 |
font-weight: 600;
|
| 62 |
}
|
| 63 |
.pretty-table tr:nth-child(even) {
|
|
@@ -78,7 +130,18 @@ begin
|
|
| 78 |
return HTML(table_html)
|
| 79 |
end
|
| 80 |
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
function search_fastfacts(claim::String; model::String = "factchecks", top_k::Int = 5)
|
| 84 |
base_url = "https://stefanjwojcik-misinfo-detection-app.hf.space/fastfactsearch"
|
|
@@ -93,6 +156,26 @@ function search_fastfacts(claim::String; model::String = "factchecks", top_k::In
|
|
| 93 |
error("Failed to fetch results: $(response.status)")
|
| 94 |
end
|
| 95 |
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
nothing
|
| 97 |
end
|
| 98 |
|
|
@@ -135,8 +218,40 @@ md" Enter your search claim in the box below and click 'submit' "
|
|
| 135 |
# βββ‘ 2ec6801c-707c-41ee-9415-e489ee52e3b5
|
| 136 |
md"### RESULTS"
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
# βββ‘ 475c45c1-8b0c-41fc-ab7e-dac5e2294e9c
|
| 139 |
-
dataframe_to_html(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
# βββ‘ 00000000-0000-0000-0000-000000000001
|
| 142 |
PLUTO_PROJECT_TOML_CONTENTS = """
|
|
@@ -626,10 +741,16 @@ version = "17.4.0+2"
|
|
| 626 |
# ββ7169689a-9d31-4ac1-bdd7-3771c515e1b3
|
| 627 |
# ββe3d65456-0646-44db-abad-193d778149ec
|
| 628 |
# ββ7ba4e521-51ba-4670-a1cf-396c555fcd5a
|
| 629 |
-
#
|
| 630 |
# ββ785238ae-08b3-4221-85ba-97f4fc7d01e3
|
| 631 |
# ββc2afdd06-9918-4445-889e-1e45a68c0cd9
|
| 632 |
# ββ2ec6801c-707c-41ee-9415-e489ee52e3b5
|
|
|
|
| 633 |
# ββ475c45c1-8b0c-41fc-ab7e-dac5e2294e9c
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 634 |
# ββ00000000-0000-0000-0000-000000000001
|
| 635 |
# ββ00000000-0000-0000-0000-000000000002
|
|
|
|
| 31 |
# βββ‘ 86b197ec-84f7-4fc1-9abf-e4244e853ce7
|
| 32 |
begin
|
| 33 |
|
| 34 |
+
# Take long text to chunks
|
| 35 |
+
function chunk_text(text::String, chunk_size::Int=280)
|
| 36 |
+
chunks = []
|
| 37 |
+
for i in 1:chunk_size:length(text)
|
| 38 |
+
newchunk = text[i:min(i+chunk_size-1, length(text))]
|
| 39 |
+
push!(chunks, newchunk)
|
| 40 |
+
end
|
| 41 |
+
return string.(chunks)
|
| 42 |
+
end
|
| 43 |
+
|
| 44 |
+
function score_to_background(score)
|
| 45 |
+
# 0 = blue, 1 = red; interpolate HSL
|
| 46 |
+
hue = round(Int, 240 * (1 - score)) # 240 (blue) β 0 (red)
|
| 47 |
+
return "hsl($hue, 100%, 50%)"
|
| 48 |
+
end
|
| 49 |
+
|
| 50 |
+
function text_color_for_bg(score)
|
| 51 |
+
# If background is dark (low L), use light text
|
| 52 |
+
return score > 0.5 ? "white" : "black"
|
| 53 |
+
end
|
| 54 |
+
|
| 55 |
+
function score_to_color(score)
|
| 56 |
+
if score β€ 0.7
|
| 57 |
+
# Interpolate from blue (#0000FF) to gray (#888888)
|
| 58 |
+
t = score / 0.7
|
| 59 |
+
r = round(Int, (1 - t) * 0 + t * 136)
|
| 60 |
+
g = round(Int, (1 - t) * 0 + t * 136)
|
| 61 |
+
b = round(Int, (1 - t) * 255 + t * 136)
|
| 62 |
+
else
|
| 63 |
+
# Interpolate from gray (#888888) to red (#FF0000)
|
| 64 |
+
t = (score - 0.7) / 0.7
|
| 65 |
+
r = round(Int, (1 - t) * 136 + t * 255)
|
| 66 |
+
g = round(Int, (1 - t) * 136 + t * 0)
|
| 67 |
+
b = round(Int, (1 - t) * 136 + t * 0)
|
| 68 |
+
end
|
| 69 |
+
return "rgb($r,$g,$b)"
|
| 70 |
+
end
|
| 71 |
+
|
| 72 |
+
function dataframe_to_html(df::DataFrame)
|
| 73 |
html_rows = String[]
|
| 74 |
+
colnames = names(df)
|
| 75 |
+
|
| 76 |
+
# Header
|
| 77 |
+
push!(html_rows, "<thead><tr>" * join(["<th>$(col)</th>" for col in colnames]) * "</tr></thead>")
|
| 78 |
+
|
| 79 |
# Data rows
|
| 80 |
for row in eachrow(df)
|
| 81 |
+
row_html = "<tr>"
|
| 82 |
+
for col in colnames
|
| 83 |
+
val = row[col]
|
| 84 |
+
if col == :text && :score in colnames
|
| 85 |
+
bg = score_to_background(row[:score])
|
| 86 |
+
fg = text_color_for_bg(row[:score])
|
| 87 |
+
row_html *= "<td style='background-color:$bg; color:$fg;'>$(val)</td>"
|
| 88 |
+
else
|
| 89 |
+
row_html *= "<td>$(val)</td>"
|
| 90 |
+
end
|
| 91 |
+
end
|
| 92 |
+
row_html *= "</tr>"
|
| 93 |
+
push!(html_rows, row_html)
|
| 94 |
end
|
| 95 |
|
| 96 |
+
# Full HTML with style
|
| 97 |
table_html = """
|
| 98 |
<style>
|
| 99 |
table.pretty-table {
|
|
|
|
| 101 |
width: 100%;
|
| 102 |
font-family: system-ui, sans-serif;
|
| 103 |
margin-top: 10px;
|
|
|
|
| 104 |
background-color: transparent;
|
| 105 |
}
|
| 106 |
.pretty-table th, .pretty-table td {
|
|
|
|
| 109 |
}
|
| 110 |
.pretty-table th {
|
| 111 |
background-color: #333;
|
| 112 |
+
color: white;
|
| 113 |
font-weight: 600;
|
| 114 |
}
|
| 115 |
.pretty-table tr:nth-child(even) {
|
|
|
|
| 130 |
return HTML(table_html)
|
| 131 |
end
|
| 132 |
|
| 133 |
+
function scored_text_paragraph(df::DataFrame)
|
| 134 |
+
fragments = String[]
|
| 135 |
+
|
| 136 |
+
for row in eachrow(df)
|
| 137 |
+
color = score_to_color(row[:score])
|
| 138 |
+
text = row[:text]
|
| 139 |
+
push!(fragments, """<span style="color: $color;">$text</span>""")
|
| 140 |
+
end
|
| 141 |
+
|
| 142 |
+
html = "<p style='font-family: system-ui, sans-serif; font-size: 1.1rem;'>" * join(fragments, " ") * "</p>"
|
| 143 |
+
return HTML(html)
|
| 144 |
+
end
|
| 145 |
|
| 146 |
function search_fastfacts(claim::String; model::String = "factchecks", top_k::Int = 5)
|
| 147 |
base_url = "https://stefanjwojcik-misinfo-detection-app.hf.space/fastfactsearch"
|
|
|
|
| 156 |
error("Failed to fetch results: $(response.status)")
|
| 157 |
end
|
| 158 |
end
|
| 159 |
+
|
| 160 |
+
# Iterated fastfact
|
| 161 |
+
function iteratedfastfact(df::DataFrame)
|
| 162 |
+
# Create a new DataFrame to store the results
|
| 163 |
+
results_df = DataFrame()
|
| 164 |
+
|
| 165 |
+
# Iterate over each row in the DataFrame
|
| 166 |
+
for i in 1:nrow(df)
|
| 167 |
+
text = df[i, :text]
|
| 168 |
+
result = select(DataFrame(search_fastfacts(text; top_k=1)),
|
| 169 |
+
[:score, :policy, :text])
|
| 170 |
+
|
| 171 |
+
# Append the result to the results DataFrame
|
| 172 |
+
results_df = vcat(results_df, result)
|
| 173 |
+
end
|
| 174 |
+
|
| 175 |
+
return results_df
|
| 176 |
+
|
| 177 |
+
end
|
| 178 |
+
|
| 179 |
nothing
|
| 180 |
end
|
| 181 |
|
|
|
|
| 218 |
# βββ‘ 2ec6801c-707c-41ee-9415-e489ee52e3b5
|
| 219 |
md"### RESULTS"
|
| 220 |
|
| 221 |
+
# βββ‘ eb78b6f9-ead9-4dbc-8a0b-befb141523d0
|
| 222 |
+
begin
|
| 223 |
+
df = select(DataFrame(search_fastfacts(claimtext; top_k=nclaims)), [:score, :policy, :text])
|
| 224 |
+
nothing
|
| 225 |
+
end
|
| 226 |
+
|
| 227 |
# βββ‘ 475c45c1-8b0c-41fc-ab7e-dac5e2294e9c
|
| 228 |
+
dataframe_to_html(df)
|
| 229 |
+
|
| 230 |
+
# βββ‘ 3b52bf06-c1cd-441b-aab3-06ffb0d39208
|
| 231 |
+
md"""
|
| 232 |
+
### Fact-checking Long Text
|
| 233 |
+
"""
|
| 234 |
+
|
| 235 |
+
# βββ‘ 7bca1473-95a2-443b-9689-cc40fa51727f
|
| 236 |
+
md"""
|
| 237 |
+
Put in a long piece of text, such as a speech or video transcription.
|
| 238 |
+
"""
|
| 239 |
+
|
| 240 |
+
# βββ‘ f1cea517-2f40-4cc0-a621-6f3bb1056e44
|
| 241 |
+
@bind longtext confirm(TextField((90, 12), default="The election of the president and for vice president of the United States is an indirect election in which citizens of the United States who are registered to vote in one of the fifty U.S. states or in Washington, D.C., cast ballots not directly for those offices, but instead for members of the Electoral College.[note 1] These electors then cast direct votes, known as electoral votes, for president and for vice president. The candidate who receives an absolute majority of electoral votes (at least 270 out of 538, since the Twenty-third Amendment granted voting rights to citizens of D.C.) is then elected to that office. If no candidate receives an absolute majority of the votes for president, the House of Representatives elects the president; likewise if no one receives an absolute majority of the votes for vice president, then the Senate elects the vice president. ...."))
|
| 242 |
+
|
| 243 |
+
# βββ‘ be418613-b934-4a3c-8728-efe76e208db2
|
| 244 |
+
md" #### Fact-checked Text
|
| 245 |
+
Blue = factful;
|
| 246 |
+
Red = questionable
|
| 247 |
+
"
|
| 248 |
+
|
| 249 |
+
# βββ‘ 59013658-1462-4b6e-b490-27664ac9e1a4
|
| 250 |
+
begin
|
| 251 |
+
iterdf = DataFrame(text = chunk_text(longtext, 80))
|
| 252 |
+
newdf = iteratedfastfact(iterdf)
|
| 253 |
+
scored_text_paragraph(newdf)
|
| 254 |
+
end
|
| 255 |
|
| 256 |
# βββ‘ 00000000-0000-0000-0000-000000000001
|
| 257 |
PLUTO_PROJECT_TOML_CONTENTS = """
|
|
|
|
| 741 |
# ββ7169689a-9d31-4ac1-bdd7-3771c515e1b3
|
| 742 |
# ββe3d65456-0646-44db-abad-193d778149ec
|
| 743 |
# ββ7ba4e521-51ba-4670-a1cf-396c555fcd5a
|
| 744 |
+
# ββ28653105-c9cc-4ee1-9df8-a54644172a4c
|
| 745 |
# ββ785238ae-08b3-4221-85ba-97f4fc7d01e3
|
| 746 |
# ββc2afdd06-9918-4445-889e-1e45a68c0cd9
|
| 747 |
# ββ2ec6801c-707c-41ee-9415-e489ee52e3b5
|
| 748 |
+
# ββeb78b6f9-ead9-4dbc-8a0b-befb141523d0
|
| 749 |
# ββ475c45c1-8b0c-41fc-ab7e-dac5e2294e9c
|
| 750 |
+
# ββ3b52bf06-c1cd-441b-aab3-06ffb0d39208
|
| 751 |
+
# ββ7bca1473-95a2-443b-9689-cc40fa51727f
|
| 752 |
+
# ββf1cea517-2f40-4cc0-a621-6f3bb1056e44
|
| 753 |
+
# ββbe418613-b934-4a3c-8728-efe76e208db2
|
| 754 |
+
# ββ59013658-1462-4b6e-b490-27664ac9e1a4
|
| 755 |
# ββ00000000-0000-0000-0000-000000000001
|
| 756 |
# ββ00000000-0000-0000-0000-000000000002
|
utils.jl
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Utility Functions
|
| 2 |
+
|
| 3 |
+
# Score to colors
|
| 4 |
+
function score_to_color(score)
|
| 5 |
+
if score β€ 0.5
|
| 6 |
+
# Interpolate from blue (#0000FF) to gray (#888888)
|
| 7 |
+
t = score / 0.5
|
| 8 |
+
r = round(Int, (1 - t) * 0 + t * 136)
|
| 9 |
+
g = round(Int, (1 - t) * 0 + t * 136)
|
| 10 |
+
b = round(Int, (1 - t) * 255 + t * 136)
|
| 11 |
+
else
|
| 12 |
+
# Interpolate from gray (#888888) to red (#FF0000)
|
| 13 |
+
t = (score - 0.5) / 0.5
|
| 14 |
+
r = round(Int, (1 - t) * 136 + t * 255)
|
| 15 |
+
g = round(Int, (1 - t) * 136 + t * 0)
|
| 16 |
+
b = round(Int, (1 - t) * 136 + t * 0)
|
| 17 |
+
end
|
| 18 |
+
return "rgb($r,$g,$b)"
|
| 19 |
+
end
|
| 20 |
+
|
| 21 |
+
# Creates a paragraph with colored text based on scores from a dataframe
|
| 22 |
+
function scored_text_paragraph(df::DataFrame)
|
| 23 |
+
fragments = String[]
|
| 24 |
+
|
| 25 |
+
for row in eachrow(df)
|
| 26 |
+
color = score_to_color(row[:score])
|
| 27 |
+
text = row[:text]
|
| 28 |
+
push!(fragments, """<span style="color: $color;">$text</span>""")
|
| 29 |
+
end
|
| 30 |
+
|
| 31 |
+
html = "<p style='font-family: system-ui, sans-serif; font-size: 1.1rem;'>" * join(fragments, " ") * "</p>"
|
| 32 |
+
return HTML(html)
|
| 33 |
+
end
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
df = DataFrame(score = [0.1, 0.3, 0.6, 0.9],
|
| 37 |
+
text = ["This", "is", "a", "test."])
|
| 38 |
+
scored_text_paragraph(df)
|