oberbics commited on
Commit
32b13fe
·
verified ·
1 Parent(s): d13be49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -10
app.py CHANGED
@@ -9,15 +9,30 @@ MODEL_ID = "oberbics/newspaper-argument-mining-V1"
9
  SYSTEM_PROMPT = """You are an expert at analyzing German historical texts.
10
 
11
  OUTPUT FORMAT - EXACTLY these 4 XML tags and NOTHING else:
12
- <argument>Main argument text OR NA</argument>
13
- <claim>Core claim in one sentence OR NA</claim>
14
- <explanation>Why this is an argument OR NA</explanation>
15
- <human_verification_needed>True OR False OR NA</human_verification_needed>
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  RULES:
18
  - ONLY output these 4 XML tags
19
- - If no argument exists, use NA for all fields
20
- - Extract complete argumentative passages, not fragments"""
 
 
 
21
 
22
  print("Loading tokenizer...")
23
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
@@ -40,6 +55,7 @@ model = AutoModelForCausalLM.from_pretrained(
40
  )
41
  print("Model loaded successfully!")
42
 
 
43
  def extract_arguments(text, temperature=0.1):
44
  if not text or not text.strip():
45
  return "", "Please enter some text to analyze."
@@ -47,7 +63,7 @@ def extract_arguments(text, temperature=0.1):
47
  prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
48
  {SYSTEM_PROMPT}<|eot_id|>
49
  <|start_header_id|>user<|end_header_id|>
50
- Extract arguments from this German historical text:
51
  {text}<|eot_id|>
52
  <|start_header_id|>assistant<|end_header_id|>"""
53
 
@@ -57,7 +73,7 @@ Extract arguments from this German historical text:
57
  with torch.no_grad():
58
  outputs = model.generate(
59
  **inputs,
60
- max_new_tokens=500,
61
  temperature=temperature,
62
  do_sample=True if temperature > 0.01 else False,
63
  top_p=0.9,
@@ -77,7 +93,7 @@ Extract arguments from this German historical text:
77
  formatted = format_output(response)
78
  return response, formatted
79
 
80
- def format_output(xml_response):
81
  def extract_field(field_name):
82
  pattern = f'<{field_name}>(.*?)</{field_name}>'
83
  match = re.search(pattern, xml_response, re.DOTALL)
@@ -101,7 +117,7 @@ def format_output(xml_response):
101
  else:
102
  return """❌ **No Argument Found**
103
 
104
- The text does not contain an argumentative unit."""
105
 
106
  # Gradio interface
107
  demo = gr.Interface(
 
9
  SYSTEM_PROMPT = """You are an expert at analyzing German historical texts.
10
 
11
  OUTPUT FORMAT - EXACTLY these 4 XML tags and NOTHING else:
12
+ <argument>Main argument text OR "NA"</argument>
13
+ <claim>Core claim in one sentence OR "NA"</claim>
14
+ <explanation>Why this is an argument OR "NA"</explanation>
15
+ <human_verification_needed>True OR False</human_verification_needed>
16
+
17
+ EXAMPLE WITH ARGUMENT:
18
+ <argument>Es sind furchtbare Bilder, die sich dabei entrollen. Unter den Trümmern des einen Hause», so erzählt Luigt Barsint im Corrtcre della sera, findet man die Leichen von Unglück lichen, die in anderen Häusern gewohnt baben und die in der Ber- Wirrung de» schrcck.ichen Augenblickes instinktiv bet Fremden Hülfe und Unterschlupf suchten. Niemand erkennt jetzt diese armen Ein dringlinge, ihre Leichen werden nicht reklamiert, und man trägt sie hinunter an de» Strand, wo sie in langer Reihe einer neben den anderen hingebettet weiden, in denselben Tüchern und Decken, in denen sie tbren Tod gesunden.</argument>
19
+ <claim>The earthquake's chaos led to unidentified victims dying in unfamiliar places.</claim>
20
+ <explanation>Describes how people fled to other houses seeking help during the disaster, died there, and now cannot be identified or claimed by relatives. Shows cause (panic/confusion) and effect (anonymous deaths).</explanation>
21
+ <human_verification_needed>False</human_verification_needed>
22
+
23
+ EXAMPLE WITHOUT ARGUMENT:
24
+ <argument>NA</argument>
25
+ <claim>NA</claim>
26
+ <explanation>NA</explanation>
27
+ <human_verification_needed>FALSE</human_verification_needed>
28
 
29
  RULES:
30
  - ONLY output these 4 XML tags
31
+ - Extract only original text without changes or use NA when you did not find an argument
32
+ - Extract full passages/units of argument (should be more than one sentencen)
33
+ - In cases of uncertainty or ambiguity, say human_verification_needed TRUE
34
+ - If no argument exists, use NA for all fields except <human_verification_needed>FALSE or TRUE</human_verification_needed>
35
+ - More than one argumentative unit possible for one aticle, one unit has one clear clame and all the xml structures"""
36
 
37
  print("Loading tokenizer...")
38
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 
55
  )
56
  print("Model loaded successfully!")
57
 
58
+ @spaces.GPU
59
  def extract_arguments(text, temperature=0.1):
60
  if not text or not text.strip():
61
  return "", "Please enter some text to analyze."
 
63
  prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
64
  {SYSTEM_PROMPT}<|eot_id|>
65
  <|start_header_id|>user<|end_header_id|>
66
+ Extract arguments from historical text. Arguments in historical texts are often implicit (only have a premise and no conclusion). Pay attention to find premises:
67
  {text}<|eot_id|>
68
  <|start_header_id|>assistant<|end_header_id|>"""
69
 
 
73
  with torch.no_grad():
74
  outputs = model.generate(
75
  **inputs,
76
+ max_new_tokens=5000,
77
  temperature=temperature,
78
  do_sample=True if temperature > 0.01 else False,
79
  top_p=0.9,
 
93
  formatted = format_output(response)
94
  return response, formatted
95
 
96
+ '''def format_output(xml_response):
97
  def extract_field(field_name):
98
  pattern = f'<{field_name}>(.*?)</{field_name}>'
99
  match = re.search(pattern, xml_response, re.DOTALL)
 
117
  else:
118
  return """❌ **No Argument Found**
119
 
120
+ The text does not contain an argumentative unit."""'''
121
 
122
  # Gradio interface
123
  demo = gr.Interface(