Frenchizer commited on
Commit
d0e1ffd
·
verified ·
1 Parent(s): a385b05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -23
app.py CHANGED
@@ -3,7 +3,6 @@ from transformers import pipeline
3
  import spacy
4
  from gradio_client import Client
5
  import re
6
- import httpx
7
 
8
  # Initialize models
9
  nlp = spacy.load("en_core_web_sm")
@@ -15,50 +14,52 @@ def preprocess_capitalization(text: str) -> str:
15
  processed_words = []
16
 
17
  for word in words:
 
18
  if re.match(r"^[A-Z]+$", word):
19
  processed_words.append(word) # Leave acronyms unchanged
 
20
  elif re.search(r"[A-Z]", word) and re.search(r"[a-z]", word):
21
  processed_words.append(word[0].upper() + word[1:].lower()) # Correct capitalization
22
  else:
23
- processed_words.append(word)
24
 
25
  return " ".join(processed_words)
26
 
27
  def preprocess_text(text: str):
28
  """Process text and return corrections with position information."""
29
  result = {
30
- "suggestions": []
 
 
 
31
  }
32
 
33
- # Step 1: Apply capitalization preprocessing (Spell type)
34
  capitalized_text = preprocess_capitalization(text)
35
  if capitalized_text != text:
36
- result["suggestions"].append({
37
  "original": text,
38
  "corrected": capitalized_text,
39
  "type": "spell"
40
  })
41
  text = capitalized_text # Update text for further processing
42
 
43
- # Step 2: Transformer spell check (Spell type)
44
- spell_checked = spell_checker(text, max_length=512)[0]['generated_text'].strip()
45
- # Only add as a spell suggestion if it’s a true spelling change (not just punctuation)
46
- if spell_checked != text and spell_checked.rstrip('!.') == text.rstrip('!.'):
47
- # If the difference is only punctuation, skip adding it here
48
- pass
49
- elif spell_checked != text:
50
- result["suggestions"].append({
51
  "original": text,
52
  "corrected": spell_checked,
53
  "type": "spell"
54
  })
55
- text = spell_checked # Update text only for true spell corrections
56
 
57
- # Step 3: Add "other" suggestion (e.g., punctuation)
58
- if not text.endswith("!") and text.strip():
59
- result["suggestions"].append({
60
- "original": text,
61
- "corrected": text + "!",
 
 
62
  "type": "other"
63
  })
64
 
@@ -70,15 +71,14 @@ def preprocess_text(text: str):
70
  return text, result
71
 
72
  def preprocess_and_forward(text: str):
73
- """Process text and forward to translation service with custom timeout."""
74
  original_text, preprocessing_result = preprocess_text(text)
75
 
 
 
76
  try:
77
- client = Client("Frenchizer/space_21", httpx_timeout=httpx.Timeout(60.0))
78
  translation = client.predict(original_text)
79
  return translation, preprocessing_result
80
- except httpx.ReadTimeout:
81
- return "Error: Translation service timed out after 60 seconds. Please try again later.", preprocessing_result
82
  except Exception as e:
83
  return f"Error: {str(e)}", preprocessing_result
84
 
 
3
  import spacy
4
  from gradio_client import Client
5
  import re
 
6
 
7
  # Initialize models
8
  nlp = spacy.load("en_core_web_sm")
 
14
  processed_words = []
15
 
16
  for word in words:
17
+ # Check if the word is an acronym (all uppercase letters)
18
  if re.match(r"^[A-Z]+$", word):
19
  processed_words.append(word) # Leave acronyms unchanged
20
+ # Check if the word has mixed capitalization (e.g., "HEllo")
21
  elif re.search(r"[A-Z]", word) and re.search(r"[a-z]", word):
22
  processed_words.append(word[0].upper() + word[1:].lower()) # Correct capitalization
23
  else:
24
+ processed_words.append(word) # Leave other words unchanged
25
 
26
  return " ".join(processed_words)
27
 
28
  def preprocess_text(text: str):
29
  """Process text and return corrections with position information."""
30
  result = {
31
+ "spell_suggestions": [],
32
+ "other_suggestions": [],
33
+ "entities": [],
34
+ "tags": []
35
  }
36
 
37
+ # Apply capitalization preprocessing
38
  capitalized_text = preprocess_capitalization(text)
39
  if capitalized_text != text:
40
+ result["spell_suggestions"].append({
41
  "original": text,
42
  "corrected": capitalized_text,
43
  "type": "spell"
44
  })
45
  text = capitalized_text # Update text for further processing
46
 
47
+ # Transformer spell check
48
+ spell_checked = spell_checker(text, max_length=512)[0]['generated_text']
49
+ if spell_checked != text:
50
+ result["spell_suggestions"].append({
 
 
 
 
51
  "original": text,
52
  "corrected": spell_checked,
53
  "type": "spell"
54
  })
 
55
 
56
+ # Add example other suggestions (you would replace this with your actual logic)
57
+ for word in text.split():
58
+ if word.endswith('.') or word.endswith('?') or word.endswith('!'):
59
+ continue
60
+ result["other_suggestions"].append({
61
+ "original": word,
62
+ "corrected": word + "!",
63
  "type": "other"
64
  })
65
 
 
71
  return text, result
72
 
73
  def preprocess_and_forward(text: str):
74
+ """Process text and forward to translation service."""
75
  original_text, preprocessing_result = preprocess_text(text)
76
 
77
+ # Forward original text to translation service
78
+ client = Client("Frenchizer/space_21")
79
  try:
 
80
  translation = client.predict(original_text)
81
  return translation, preprocessing_result
 
 
82
  except Exception as e:
83
  return f"Error: {str(e)}", preprocessing_result
84