davda54 commited on
Commit
a0f4c64
·
verified ·
1 Parent(s): f3cd202

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -28
app.py CHANGED
@@ -5,6 +5,101 @@ from datetime import datetime
5
  from typing import Dict, List, Tuple
6
  import hashlib
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  # Dummy dataset - replace with actual HuggingFace dataset loading
9
  DUMMY_DATASET = [
10
  {
@@ -227,42 +322,28 @@ custom_css = """
227
  """
228
 
229
  # Create Gradio interface
230
- with gr.Blocks(theme=gr.themes.Soft(), title="Dataset Annotation Tool", css=custom_css) as app:
231
  gr.Markdown("# Norwegian Fluency Annotation")
232
  with gr.Accordion("▶ Click here to see the full annotation guidelines:", open=False):
233
- gr.Markdown("""
234
- ## Detailed Information
235
-
236
- This content is hidden by default and can be expanded.
237
-
238
- - Point 1
239
- - Point 2
240
- - Point 3
241
-
242
- You can put any Gradio components here, including:
243
- - Markdown
244
- - Code blocks
245
- - Images
246
- - Interactive components
247
- """, padding=True)
248
 
249
  user_state = gr.State("")
250
 
251
  # Login Interface
252
- with gr.Group(visible=True, elem_id="login-group") as login_interface:
253
- gr.Markdown("## Login", padding=True)
254
- user_id_input = gr.Textbox(
255
- label="Enter your unique annotator ID to begin",
256
- placeholder="Annotator ID"
257
- )
258
-
259
- with gr.Row():
260
- login_btn = gr.Button("Login", variant="primary", scale=0.2, min_width=100)
261
- gr.HTML("")
262
- login_status = gr.Markdown("", padding=True)
263
 
264
  # Annotation Interface
265
- with gr.Group(visible=False, elem_id="annotation-group") as annotation_interface:
266
  progress_label = gr.Markdown("")
267
 
268
  # Row 1: Prompt
 
5
  from typing import Dict, List, Tuple
6
  import hashlib
7
 
8
+
9
+ guideline = """
10
+ ## Overview
11
+
12
+ This document provides guidelines for evaluating the fluency of Norwegian responses generated by language models. Annotators will compare pairs of responses (Response A and Response B) and determine which response demonstrates better fluency, or if they are equally fluent. The evaluation focuses exclusively on language quality, naturalness, and grammaticality.
13
+
14
+ ## Key principle
15
+
16
+ **Fluency evaluation is strictly limited to linguistic quality.** Do NOT consider:
17
+ - Factual accuracy or correctness
18
+ - Completeness of information
19
+ - Creativity or originality
20
+ - Formatting or structure (unless it affects readability)
21
+ - Length or conciseness
22
+
23
+ ## Definitions
24
+
25
+ ### What is fluency?
26
+
27
+ Fluency refers to the linguistic quality of text that makes it natural, smooth, and easy to read. A fluent response:
28
+
29
+ - **Grammatically correct**: Follows standard grammar rules with proper syntax
30
+ - **Natural-sounding**: Reads like something a native speaker would write
31
+ - **Coherent**: Maintains logical flow between sentences and paragraphs
32
+ - **Well-formed**: Uses appropriate vocabulary, punctuation, and sentence structure
33
+ - **Smooth**: Flows naturally without awkward phrasing or jarring transitions
34
+ - **Norwegian**: The models respond to Norwegian prompts and so they should always be either in Norwegian Bokmål or Norwegian Nynorsk
35
+
36
+ ### Fluency issues to look for
37
+
38
+ When evaluating fluency, pay attention to:
39
+
40
+ 1. **Grammar errors**: Subject-verb disagreement, incorrect tense, wrong word forms
41
+ 2. **Awkward phrasing**: Unnatural word order, stilted expressions, robotic language
42
+ 3. **Punctuation problems**: Missing or incorrect punctuation that affects readability
43
+ 4. **Word choice issues**: Inappropriate vocabulary, incorrect word usage, repetitive language
44
+ 5. **Sentence structure problems**: Run-on sentences, fragments, unclear pronoun references
45
+ 6. **Flow disruptions**: Abrupt transitions, disconnected ideas within sentences
46
+ 7. **Spelling errors**: Typos and misspellings that affect readability
47
+ 8. **Translationese**: A common problem of language models is that they base their output on English -- the majority language in the language corpus. This can result in unnatural language patterns that look like literal translations from English, such as: TODO
48
+
49
+ ## Annotation procedure
50
+
51
+ ### Step-by-Step process
52
+
53
+ 1. **Read both responses completely** without making immediate judgments
54
+ 2. **Focus solely on language quality** - ignore content accuracy and relevance
55
+ 3. **Identify fluency issues** in each response using the criteria above
56
+ 4. **Compare the severity and frequency** of fluency issues between responses
57
+ 5. **Make your decision** based on overall fluency
58
+
59
+ ### Decision options
60
+
61
+ You must select one of three options:
62
+
63
+ - **A is more fluent**: Response A has better overall language quality than Response B
64
+ - **B is more fluent**: Response B has better overall language quality than Response A
65
+ - **Equal fluency**: Both responses have similar language quality (minor differences that don't clearly favor either response)
66
+
67
+ ### Important guidelines
68
+
69
+ - **Minor differences matter**: Even small improvements in fluency should influence your decision
70
+ - **Consider overall impression**: Multiple minor issues may outweigh a single major issue
71
+ - **Be consistent**: Apply the same standards across all evaluations
72
+ - **When in doubt about equality**: If you cannot decisively determine which is better after careful analysis, select "Equal fluency"
73
+
74
+ ## Examples
75
+
76
+ ### Example 1: Clear fluency difference
77
+
78
+ TODO
79
+
80
+ ### Example 2: Equal fluency
81
+
82
+ TODO
83
+
84
+ ### Example 3: Subtle fluency difference
85
+
86
+ TODO
87
+
88
+ ### Example 4: Content vs. fluency
89
+
90
+ TODO
91
+
92
+ ## Edge cases and special considerations
93
+
94
+ TODO
95
+
96
+ **Technical or specialized language**: Technical terminology and domain-specific language should be considered fluent if used correctly and consistently, even if it might seem less natural to a general audience.
97
+
98
+ **Formatting issues**: Ignore formatting differences (bold, italics, bullet points) unless they directly impact readability or sentence structure.
99
+
100
+ **Code or mathematical expressions**: If responses contain code snippets or mathematical expressions, evaluate only the fluency of the natural language portions.
101
+ """
102
+
103
  # Dummy dataset - replace with actual HuggingFace dataset loading
104
  DUMMY_DATASET = [
105
  {
 
322
  """
323
 
324
  # Create Gradio interface
325
+ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Source Sans Pro"), "Arial"]), title="Dataset Annotation Tool", css=custom_css) as app:
326
  gr.Markdown("# Norwegian Fluency Annotation")
327
  with gr.Accordion("▶ Click here to see the full annotation guidelines:", open=False):
328
+ gr.Markdown(guideline, padding=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
 
330
  user_state = gr.State("")
331
 
332
  # Login Interface
333
+ with gr.Column(visible=True, elem_id="login-group") as login_interface:
334
+ with gr.Group():
335
+ gr.Markdown("## Login", padding=True)
336
+ user_id_input = gr.Textbox(
337
+ label="Enter your unique annotator ID to begin",
338
+ placeholder="Annotator ID"
339
+ )
340
+ with gr.Row():
341
+ login_btn = gr.Button("Login", variant="primary", scale=0.2, min_width=100)
342
+ gr.HTML("")
343
+ login_status = gr.Markdown("", padding=True)
344
 
345
  # Annotation Interface
346
+ with gr.Column(visible=False, elem_id="annotation-group") as annotation_interface:
347
  progress_label = gr.Markdown("")
348
 
349
  # Row 1: Prompt