Spaces:

spruceemmanuel
/

wellsaid

Runtime error

App Files Files Community

iamspruce commited on Jun 16, 2025

Commit

f5d6f13

1 Parent(s): 3e24d97

updated the api

Browse files

Files changed (4) hide show

app/models.py +67 -11
app/prompts.py +83 -9
app/routers/analyze.py +108 -26
requirements.txt +1 -1

app/models.py CHANGED Viewed

@@ -1,39 +1,95 @@
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
 import torch
 device = torch.device("cpu")
-# Grammar model
 grammar_tokenizer = AutoTokenizer.from_pretrained("vennify/t5-base-grammar-correction")
 grammar_model = AutoModelForSeq2SeqLM.from_pretrained("vennify/t5-base-grammar-correction").to(device)
-# FLAN-T5 for all prompts
 flan_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
 flan_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small").to(device)
-# Translation model
 trans_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-ROMANCE")
 trans_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-ROMANCE").to(device)
-def run_grammar_correction(text: str):
     inputs = grammar_tokenizer(f"fix: {text}", return_tensors="pt").to(device)
     outputs = grammar_model.generate(**inputs)
     return grammar_tokenizer.decode(outputs[0], skip_special_tokens=True)
-def run_flan_prompt(prompt: str):
     inputs = flan_tokenizer(prompt, return_tensors="pt").to(device)
     outputs = flan_model.generate(**inputs)
     return flan_tokenizer.decode(outputs[0], skip_special_tokens=True)
-def run_translation(text: str, target_lang: str):
     inputs = trans_tokenizer(f">>{target_lang}<< {text}", return_tensors="pt").to(device)
     outputs = trans_model.generate(**inputs)
     return trans_tokenizer.decode(outputs[0], skip_special_tokens=True)
-# Add this at the bottom of models.py
-tone_classifier = pipeline("text-classification", model="bhadresh-savani/bert-base-uncased-emotion", top_k=1)
-def classify_tone(text: str):
-    result = tone_classifier(text)[0][0]
-    return result['label']

 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
 import torch
+# Set the device for model inference (CPU is used by default)
 device = torch.device("cpu")
+# --- Grammar model ---
+# Uses vennify/t5-base-grammar-correction for grammar correction tasks.
+# This model takes text and returns a grammatically corrected version.
 grammar_tokenizer = AutoTokenizer.from_pretrained("vennify/t5-base-grammar-correction")
 grammar_model = AutoModelForSeq2SeqLM.from_pretrained("vennify/t5-base-grammar-correction").to(device)
+# --- FLAN-T5 for all prompts ---
+# Uses google/flan-t5-small for various text generation tasks based on prompts,
+# such as paraphrasing, summarizing, and generating tone suggestions.
 flan_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
 flan_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small").to(device)
+# --- Translation model ---
+# Uses Helsinki-NLP/opus-mt-en-ROMANCE for English to Romance language translation.
 trans_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-ROMANCE")
 trans_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-ROMANCE").to(device)
+# --- Tone classification model ---
+# Uses j-hartmann/emotion-english-distilroberta-base for detecting emotions/tones
+# within text. This provides a more nuanced analysis than simple positive/negative.
+# 'top_k=1' ensures that only the most confident label is returned.
+tone_classifier = pipeline("sentiment-analysis", model="j-hartmann/emotion-english-distilroberta-base", top_k=1)
+def run_grammar_correction(text: str) -> str:
+    """
+    Corrects the grammar of the input text using the pre-trained T5 grammar model.
+    Args:
+        text (str): The input text to be grammatically corrected.
+    Returns:
+        str: The corrected text.
+    """
+    # Prepare the input for the grammar model by prefixing with "fix: "
     inputs = grammar_tokenizer(f"fix: {text}", return_tensors="pt").to(device)
+    # Generate the corrected output
     outputs = grammar_model.generate(**inputs)
+    # Decode the generated tokens back into a readable string, skipping special tokens
     return grammar_tokenizer.decode(outputs[0], skip_special_tokens=True)
+def run_flan_prompt(prompt: str) -> str:
+    """
+    Runs a given prompt through the FLAN-T5 model to generate a response.
+    Args:
+        prompt (str): The prompt string to be processed by FLAN-T5.
+    Returns:
+        str: The generated text response from FLAN-T5.
+    """
+    # Prepare the input for the FLAN-T5 model
     inputs = flan_tokenizer(prompt, return_tensors="pt").to(device)
+    # Generate the output based on the prompt
     outputs = flan_model.generate(**inputs)
+    # Decode the generated tokens back into a readable string
     return flan_tokenizer.decode(outputs[0], skip_special_tokens=True)
+def run_translation(text: str, target_lang: str) -> str:
+    """
+    Translates the input text to the target language using the Helsinki-NLP translation model.
+    Args:
+        text (str): The input text to be translated.
+        target_lang (str): The target language code (e.g., "fr" for French).
+    Returns:
+        str: The translated text.
+    """
+    # Prepare the input for the translation model by specifying the target language
     inputs = trans_tokenizer(f">>{target_lang}<< {text}", return_tensors="pt").to(device)
+    # Generate the translated output
     outputs = trans_model.generate(**inputs)
+    # Decode the generated tokens back into a readable string
     return trans_tokenizer.decode(outputs[0], skip_special_tokens=True)
+def classify_tone(text: str) -> str:
+    """
+    Classifies the emotional tone of the input text using the pre-trained emotion classifier.
+    Args:
+        text (str): The input text for tone classification.
+    Returns:
+        str: The detected emotional label (e.g., 'neutral', 'joy', 'sadness').
+    """
+    # The tone_classifier returns a list of dictionaries, where each dictionary
+    # contains 'label' and 'score'. We extract the 'label' from the first (and only) result.
+    result = tone_classifier(text)[0][0] # Access the first item in the list, then the first element of that list
+    return result['label']

app/prompts.py CHANGED Viewed

@@ -1,23 +1,97 @@
-def tone_prompt(text, tone):
     return f"Rewrite the following text in a {tone} tone: {text}"
-def clarity_prompt(text):
     return f"Make this clearer: {text}"
-def fluency_prompt(text):
     return f"Improve the fluency of this sentence: {text}"
-def paraphrase_prompt(text):
     return f"Paraphrase: {text}"
-def summarize_prompt(text):
     return f"Summarize: {text}"
-def pronoun_friendly_prompt(text):
-    return f"Rewrite the text using inclusive and non-offensive pronouns: {text}"
-def active_voice_prompt(text):
     return f"Detect if this is passive or active voice. If passive, suggest an active voice version: {text}"
-def tone_analysis_prompt(text):
     return f"Analyze the tone of the following text and suggest improvements if needed: {text}"

+def tone_prompt(text: str, tone: str) -> str:
+    """
+    Generates a prompt to rewrite text in a specified tone.
+    Args:
+        text (str): The original text.
+        tone (str): The desired tone (e.g., "formal", "informal", "confident").
+    Returns:
+        str: The generated prompt.
+    """
     return f"Rewrite the following text in a {tone} tone: {text}"
+def clarity_prompt(text: str) -> str:
+    """
+    Generates a prompt to make text clearer.
+    Args:
+        text (str): The original text.
+    Returns:
+        str: The generated prompt.
+    """
     return f"Make this clearer: {text}"
+def fluency_prompt(text: str) -> str:
+    """
+    Generates a prompt to improve the fluency of a sentence.
+    Args:
+        text (str): The original sentence.
+    Returns:
+        str: The generated prompt.
+    """
     return f"Improve the fluency of this sentence: {text}"
+def paraphrase_prompt(text: str) -> str:
+    """
+    Generates a prompt to paraphrase text.
+    Args:
+        text (str): The original text.
+    Returns:
+        str: The generated prompt.
+    """
     return f"Paraphrase: {text}"
+def summarize_prompt(text: str) -> str:
+    """
+    Generates a prompt to summarize text.
+    Args:
+        text (str): The original text.
+    Returns:
+        str: The generated prompt.
+    """
     return f"Summarize: {text}"
+def pronoun_friendly_prompt(text: str) -> str:
+    """
+    Generates a prompt to rewrite text using inclusive, respectful language,
+    avoiding gender-specific pronouns.
+    Args:
+        text (str): The original text.
+    Returns:
+        str: The generated prompt.
+    """
+    return f"Rewrite the following text using inclusive, respectful language avoiding gender-specific pronouns: {text}"
+def active_voice_prompt(text: str) -> str:
+    """
+    Generates a prompt to detect passive/active voice and suggest an active voice version if passive.
+    Args:
+        text (str): The original text.
+    Returns:
+        str: The generated prompt.
+    """
     return f"Detect if this is passive or active voice. If passive, suggest an active voice version: {text}"
+def tone_analysis_prompt(text: str) -> str:
+    """
+    Generates a prompt to analyze the tone of text and suggest improvements.
+    Args:
+        text (str): The original text.
+    Returns:
+        str: The generated prompt.
+    """
     return f"Analyze the tone of the following text and suggest improvements if needed: {text}"

app/routers/analyze.py CHANGED Viewed

@@ -4,50 +4,132 @@ from app import models, prompts
 from app.core.security import verify_api_key
 import language_tool_python
 import spacy
 router = APIRouter()
 nlp = spacy.load("en_core_web_sm")
 tool = language_tool_python.LanguageTool('en-US')
 class AnalyzeInput(BaseModel):
     text: str
 @router.post("/analyze")
 def analyze_text(payload: AnalyzeInput, request: Request = Depends(verify_api_key)):
     text = payload.text
-    # 1. Grammar Correction
-    grammar = models.run_grammar_correction(text)
-    # 2. Punctuation Fixes
     matches = tool.check(text)
-    punctuation_fixes = [m.message for m in matches if 'PUNCTUATION' in m.ruleId.upper()]
-    # 3. Sentence Correctness Tips
-    sentence_issues = [m.message for m in matches if 'PUNCTUATION' not in m.ruleId.upper()]
-    # 4. Tone Detection
-    tone_result = models.classify_tone(text)
-    better_tone_version = models.run_flan_prompt(prompts.tone_prompt(text, "formal"))
-    # 5. Active/Passive Voice
-    doc = nlp(text)
-    voice = "passive" if any(tok.dep_ == "auxpass" for tok in doc) else "active"
-    if voice == "passive":
-        better_voice = models.run_flan_prompt(f"Rewrite this in active voice: {text}")
-    else:
-        better_voice = "Already in active voice"
-    # 6. Inclusive Pronoun Suggestion
-    inclusive = models.run_flan_prompt(prompts.pronoun_friendly_prompt(text))
     return {
-        "grammar": grammar,
-        "punctuation_fixes": punctuation_fixes,
-        "sentence_issues": sentence_issues,
-        "tone": tone_result,
-        "tone_suggestion": better_tone_version,
-        "voice": voice,
-        "voice_suggestion": better_voice,
-        "inclusive_pronouns": inclusive
     }

 from app.core.security import verify_api_key
 import language_tool_python
 import spacy
+import difflib # Import the difflib module for text comparisons
 router = APIRouter()
+# Load the spaCy English language model for natural language processing tasks,
+# such as dependency parsing for active/passive voice detection.
 nlp = spacy.load("en_core_web_sm")
+# Initialize LanguageTool for grammar, spelling, and style checking.
+# 'en-US' specifies the English (United States) language.
 tool = language_tool_python.LanguageTool('en-US')
 class AnalyzeInput(BaseModel):
+    """
+    Pydantic BaseModel for validating the input request body for the /analyze endpoint.
+    It expects a single field: 'text' (string).
+    """
     text: str
 @router.post("/analyze")
 def analyze_text(payload: AnalyzeInput, request: Request = Depends(verify_api_key)):
+    """
+    Analyzes the provided text for grammar, punctuation, sentence correctness,
+    tone, active/passive voice, and inclusive pronoun suggestions.
+    Args:
+        payload (AnalyzeInput): The request body containing the text to be analyzed.
+        request (Request): The FastAPI Request object (dependency injected for API key verification).
+    Returns:
+        dict: A dictionary containing various analysis results.
+    """
     text = payload.text
+    # --- 1. Grammar Suggestions with Diffs ---
+    # Get the grammatically corrected version of the original text.
+    corrected_grammar = models.run_grammar_correction(text)
+    # Use difflib to find differences between the original and corrected text.
+    # difflib.SequenceMatcher compares sequences and can identify insertions, deletions, and substitutions.
+    s = difflib.SequenceMatcher(None, text.split(), corrected_grammar.split())
+    grammar_changes = []
+    # Iterate through the operations (opcodes) generated by SequenceMatcher.
+    # 'equal', 'replace', 'delete', 'insert' are the types of operations.
+    for opcode, i1, i2, j1, j2 in s.get_opcodes():
+        if opcode == 'replace':
+            # If words are replaced, format as "'original_word' -> 'corrected_word'"
+            original_part = ' '.join(text.split()[i1:i2])
+            corrected_part = ' '.join(corrected_grammar.split()[j1:j2])
+            grammar_changes.append(f"'{original_part}' \u2192 '{corrected_part}'") # Using Unicode arrow
+        elif opcode == 'delete':
+            # If words are deleted, format as "'deleted_word' removed"
+            deleted_part = ' '.join(text.split()[i1:i2])
+            grammar_changes.append(f"'{deleted_part}' removed")
+        elif opcode == 'insert':
+            # If words are inserted, format as "'inserted_word' added"
+            inserted_part = ' '.join(corrected_grammar.split()[j1:j2])
+            grammar_changes.append(f"'{inserted_part}' added")
+    # --- 2. Punctuation Fixes and 3. Sentence Correctness Feedback ---
+    # LanguageTool checks the original text for various issues including punctuation.
     matches = tool.check(text)
+    punctuation_issues = []
+    sentence_correctness_feedback = []
+    for m in matches:
+        # Check if the rule ID contains "PUNCTUATION" to categorize it.
+        if 'PUNCTUATION' in m.ruleId.upper():
+            punctuation_issues.append(m.message)
+        else:
+            # All other issues are considered general sentence correctness feedback.
+            sentence_correctness_feedback.append(m.message)
+    # --- 4. Tone Detection and Suggestion ---
+    # Classify the tone of the original text using the fine-tuned model.
+    detected_tone = models.classify_tone(text)
+    tone_suggestion_text = ""
+    # Provide a simple tone suggestion based on the detected tone.
+    # This logic can be expanded for more sophisticated suggestions.
+    if detected_tone in ["neutral", "joy"]: # Example condition for suggesting a formal tone
+        # Generate a formal tone version using FLAN-T5.
+        tone_suggestion_text = models.run_flan_prompt(prompts.tone_prompt(text, "formal"))
+    else:
+        tone_suggestion_text = f"The detected tone '{detected_tone}' seems appropriate for general communication."
+    # --- 5. Active/Passive Voice Detection and Suggestion ---
+    doc = nlp(text) # Process the text with spaCy
+    voice_detected = "active"
+    voice_suggestion = "None \u2014 active voice is fine here." # Using Unicode em dash
+    # Iterate through tokens to find passive auxiliary verbs (e.g., "is", "was" in passive constructions).
+    # A simple heuristic: if any token's dependency is 'auxpass', it's likely passive.
+    for token in doc:
+        if token.dep_ == "auxpass":
+            voice_detected = "passive"
+            # If passive, ask FLAN-T5 to rewrite it in active voice.
+            better_voice_prompt = prompts.active_voice_prompt(text)
+            voice_suggestion = models.run_flan_prompt(better_voice_prompt)
+            break # Exit loop once passive voice is detected
+    # --- 6. Inclusive Pronoun Suggestion ---
+    # Use FLAN-T5 with a specific prompt to suggest inclusive language.
+    inclusive_pronouns_suggestion = models.run_flan_prompt(prompts.pronoun_friendly_prompt(text))
+    # --- Construct the final response matching the example output structure ---
     return {
+        "grammar": {
+            "corrected": corrected_grammar,
+            "changes": grammar_changes
+        },
+        "punctuation": {
+            "issues": punctuation_issues,
+            "suggestions": [] # The grammar correction and diffs implicitly handle suggestions here
+        },
+        "sentence_correctness": sentence_correctness_feedback,
+        "tone_analysis": {
+            "detected": detected_tone,
+            "suggestion": tone_suggestion_text
+        },
+        "voice": {
+            "detected": voice_detected,
+            "suggestion": voice_suggestion
+        },
+        "inclusive_pronouns": inclusive_pronouns_suggestion
     }

requirements.txt CHANGED Viewed

@@ -7,4 +7,4 @@ pyspellchecker
 spacy
 nltk
 language-tool-python
-scikit-learn

 spacy
 nltk
 language-tool-python
+scikit-learn