foi-request-generator

Sleeping

App Files Files

HMC83 commited on Sep 2, 2025

Commit

5077d04

verified ·

1 Parent(s): 9762fa4

Update app.py

Browse files

Ditching validation for Wihtgar

Files changed (1) hide show

app.py +27 -80

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import os
 import random
 import time
 import torch
-import re  # <-- NEW
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import spaces
@@ -25,7 +24,6 @@ except Exception as e:
     tokenizer = None
 # --- Data for the Reels ---
-# A list of authority and keyword combinations.
 FOI_COMBINATIONS = [
     {"authority": "Borders NHS Board", "keywords": "whistleblowing guidance, wrongdoing, public body"},
     {"authority": "Borders NHS Board", "keywords": "ethical support, clinical triage, minutes"},
@@ -176,27 +174,9 @@ FOI_COMBINATIONS = [
     {"authority": "Lancaster City Council", "keywords": "coastal erosion, protection measures, maintenance spending"},
 ]
-# Create lists for the spinning animation from the combinations above
 ALL_AUTHORITIES_FOR_SPIN = list(set([item["authority"] for item in FOI_COMBINATIONS]))
 ALL_KEYWORDS_FOR_SPIN = list(set(kw.strip() for item in FOI_COMBINATIONS for kw in item["keywords"].split(',')))
-# --- Helper: clean model output into a numbered list starting at "1." ---
-def clean_and_validate_output(text: str):
-    """
-    Extract the main numbered list starting at '1.' and strip any closing signature lines.
-    Always returns cleaned text and a boolean flag (True = looks fine).
-    """
-    # Keep everything from the first "1." onward, if present.
-    m = re.search(r'(?m)^\s*1\.\s', text)
-    body = text[m.start():].strip() if m else text.strip()
-    # Remove common signature lines at the end (best-effort).
-    body = re.sub(r'(?im)^\s*(yours.*|kind regards.*|regards.*)$', '', body).strip()
-    # If it doesn't contain at least one numbered point, it's still usable, but we mark as not strictly-valid.
-    is_valid = bool(re.search(r'(?m)^\s*\d+\.\s', body))
-    return body, is_valid
 # --- Helper: wrap content in the FOI letter template ---
 def wrap_in_letter(authority: str, body: str) -> str:
     body = body.strip()
@@ -211,7 +191,6 @@ def wrap_in_letter(authority: str, body: str) -> str:
 # --- Backend Function for Local Inference ---
 @spaces.GPU
 def generate_request_local(authority, kw1, kw2, kw3):
-    """Generates a request using the locally loaded transformer model, with validation and retry logic."""
     if not model or not tokenizer:
         return "Error: Model is not loaded. Please check the Space logs for details."
@@ -219,70 +198,43 @@ def generate_request_local(authority, kw1, kw2, kw3):
     keyword_string = ", ".join(keywords)
     prompt = (
         "You are an expert at writing formal Freedom of Information requests to UK public authorities. "
-        f"Generate ONLY the numbered list of the specific information being requested, starting at '1.' "
-        f"for {authority}, using these keywords: {keyword_string}. "
-        "Do not include greetings or signatures."
     )
-    max_retries = 2
-    for attempt in range(max_retries):
-        try:
-            # Tokenize the input prompt
-            inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-            # Set generation parameters
-            generation_params = {
-                    "max_new_tokens": 250,
-                    "do_sample": True,
-                    "temperature": 0.25,
-                    "top_k": 50,
-                    "top_p": 0.95,
-                    "repetition_penalty": 1.1,
-                    "streamer": None,
-                    "eos_token_id": tokenizer.eos_token_id
-            }
-            # Generate text sequences
-            output_sequences = model.generate(**inputs, **generation_params)
-            # Decode the generated text
-            generated_text = tokenizer.decode(
-                output_sequences[0][len(inputs["input_ids"][0]):],
-                skip_special_tokens=True
-            ).strip()
-            # Remove artifact if present
-            if generated_text.startswith('.\n'):
-                generated_text = generated_text[2:]
-            # Clean and validate the output
-            cleaned_text, is_valid = clean_and_validate_output(generated_text)
-            # Wrap in the letter template regardless; validation just influences retry behavior
-            letter = wrap_in_letter(authority, cleaned_text)
-            if is_valid:
-                return letter
-            else:
-                print(f"Attempt {attempt + 1}/{max_retries}: Output lacked clear numbering. Retrying...")
-        except Exception as e:
-            print(f"Error during generation attempt {attempt + 1}/{max_retries}: {e}")
-            if attempt == max_retries - 1:
-                return f"An error occurred during text generation: {e}"
-    # If retries failed, return the best effort letter using the last cleaned text we had
-    return wrap_in_letter(authority, "1. [Unable to format automatically] Please restate the information requested.\n2. [Optional second point]")
 # --- Gradio UI and Spinning Logic ---
 def spin_the_reels():
-    """A generator function that simulates spinning reels and then calls the model."""
-    # 1. Simulate the spinning effect
-    spin_duration = 2.0  # seconds
-    spin_interval = 0.05 # update interval
     start_time = time.time()
     while time.time() - start_time < spin_duration:
-        # Yield random values for each reel to create the spinning illusion
         yield (
             random.choice(ALL_AUTHORITIES_FOR_SPIN),
             random.choice(ALL_KEYWORDS_FOR_SPIN),
@@ -292,22 +244,18 @@ def spin_the_reels():
         )
         time.sleep(spin_interval)
-    # 2. Select the final fixed combination
     final_combination = random.choice(FOI_COMBINATIONS)
     final_authority = final_combination["authority"]
-    # Split, strip, and pad keywords to ensure we always have 3 for the UI
     keywords_list = [k.strip() for k in final_combination["keywords"].split(',')]
-    keywords_list += [''] * (3 - len(keywords_list)) # Pad with empty strings if < 3
-    kw1, kw2, kw3 = keywords_list[:3] # Take the first 3
-    # Display the final reel values and a "Generating..." message
     yield (
         final_authority, kw1, kw2, kw3,
         f"Generating request for {final_authority}...\nPlease wait, this may take a moment."
     )
-    # 3. Call the local model and yield the final result
     generated_request = generate_request_local(final_authority, kw1, kw2, kw3)
     yield (
         final_authority, kw1, kw2, kw3,
@@ -315,7 +263,6 @@ def spin_the_reels():
     )
 # --- CSS for Styling ---
-# Added min-width to reduce UI flickering on text change
 reels_css = """
 #reels-container {
     display: flex;
@@ -328,7 +275,7 @@ reels_css = """
     border-radius: 12px;
     background-color: #fef3c7;
     box-shadow: 0 4px 6px rgba(0,0,0,0.1);
-    min-width: 150px; /* Prevents resizing/flickering during spin */
 }
 #reels-container .gradio-textbox input {
     font-size: 1.25rem !important;

 import random
 import time
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import spaces
     tokenizer = None
 # --- Data for the Reels ---
 FOI_COMBINATIONS = [
     {"authority": "Borders NHS Board", "keywords": "whistleblowing guidance, wrongdoing, public body"},
     {"authority": "Borders NHS Board", "keywords": "ethical support, clinical triage, minutes"},
     {"authority": "Lancaster City Council", "keywords": "coastal erosion, protection measures, maintenance spending"},
 ]
 ALL_AUTHORITIES_FOR_SPIN = list(set([item["authority"] for item in FOI_COMBINATIONS]))
 ALL_KEYWORDS_FOR_SPIN = list(set(kw.strip() for item in FOI_COMBINATIONS for kw in item["keywords"].split(',')))
 # --- Helper: wrap content in the FOI letter template ---
 def wrap_in_letter(authority: str, body: str) -> str:
     body = body.strip()
 # --- Backend Function for Local Inference ---
 @spaces.GPU
 def generate_request_local(authority, kw1, kw2, kw3):
     if not model or not tokenizer:
         return "Error: Model is not loaded. Please check the Space logs for details."
     keyword_string = ", ".join(keywords)
     prompt = (
         "You are an expert at writing formal Freedom of Information requests to UK public authorities. "
+        f"Generate the request text (without greeting or signature) for {authority}, using these keywords: {keyword_string}."
     )
+    try:
+        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+        generation_params = {
+            "max_new_tokens": 250,
+            "do_sample": True,
+            "temperature": 0.25,
+            "top_k": 50,
+            "top_p": 0.95,
+            "repetition_penalty": 1.1,
+            "eos_token_id": tokenizer.eos_token_id
+        }
+        output_sequences = model.generate(**inputs, **generation_params)
+        generated_text = tokenizer.decode(
+            output_sequences[0][len(inputs["input_ids"][0]):],
+            skip_special_tokens=True
+        ).strip()
+        if generated_text.startswith('.\n'):
+            generated_text = generated_text[2:]
+        return wrap_in_letter(authority, generated_text)
+    except Exception as e:
+        return f"An error occurred during text generation: {e}"
 # --- Gradio UI and Spinning Logic ---
 def spin_the_reels():
+    spin_duration = 2.0
+    spin_interval = 0.05
     start_time = time.time()
     while time.time() - start_time < spin_duration:
         yield (
             random.choice(ALL_AUTHORITIES_FOR_SPIN),
             random.choice(ALL_KEYWORDS_FOR_SPIN),
         )
         time.sleep(spin_interval)
     final_combination = random.choice(FOI_COMBINATIONS)
     final_authority = final_combination["authority"]
     keywords_list = [k.strip() for k in final_combination["keywords"].split(',')]
+    keywords_list += [''] * (3 - len(keywords_list))
+    kw1, kw2, kw3 = keywords_list[:3]
     yield (
         final_authority, kw1, kw2, kw3,
         f"Generating request for {final_authority}...\nPlease wait, this may take a moment."
     )
     generated_request = generate_request_local(final_authority, kw1, kw2, kw3)
     yield (
         final_authority, kw1, kw2, kw3,
     )
 # --- CSS for Styling ---
 reels_css = """
 #reels-container {
     display: flex;
     border-radius: 12px;
     background-color: #fef3c7;
     box-shadow: 0 4px 6px rgba(0,0,0,0.1);
+    min-width: 150px;
 }
 #reels-container .gradio-textbox input {
     font-size: 1.25rem !important;