Spaces:

sanketshinde3001
/

TextConvert

Sleeping

App Files Files Community

sanketshinde3001 commited on Mar 21, 2025

Commit

b04beef

verified ·

1 Parent(s): 26043fc

Create app.py

Browse files

Files changed (1) hide show

app.py +288 -0

app.py ADDED Viewed

	@@ -0,0 +1,288 @@

+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from transformers import pipeline
+import difflib
+import spacy
+import re
+from nltk.sentiment import SentimentIntensityAnalyzer
+import nltk
+from collections import Counter
+import uvicorn
+# Download NLTK resources
+try:
+    nltk.download('vader_lexicon', quiet=True)
+    nltk.download('punkt', quiet=True)
+    nltk.download('stopwords', quiet=True)
+except:
+    print("Could not download NLTK resources. Some features may be limited.")
+app = FastAPI()
+# Configure CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Allows all origins
+    allow_credentials=True,
+    allow_methods=["*"],  # Allows all methods
+    allow_headers=["*"],  # Allows all headers
+)
+# Load NLP models
+try:
+    # Load text humanization model
+    humanize_pipe = pipeline("text2text-generation", model="danibor/flan-t5-base-humanizer")
+    # Load spaCy model
+    nlp = spacy.load("en_core_web_sm")
+    # Initialize sentiment analyzer
+    sentiment_analyzer = SentimentIntensityAnalyzer()
+    print("All NLP models loaded successfully!")
+except Exception as e:
+    print(f"Error loading models: {e}")
+    # Create fallback functions if models fail to load
+    def mock_function(text):
+        return "Model could not be loaded. This is a fallback response."
+# Define request models
+class TextRequest(BaseModel):
+    text: str
+class HumanizeResponse(BaseModel):
+    original_text: str
+    humanized_text: str
+    diff: list
+    original_word_count: int
+    humanized_word_count: int
+    nlp_analysis: dict
+class AnalyzeResponse(BaseModel):
+    text: str
+    word_count: int
+    sentiment: dict
+    entities: dict
+    key_phrases: list
+    readability: dict
+    complexity: dict
+@app.post("/humanize", response_model=HumanizeResponse)
+async def humanize_text(request: TextRequest):
+    input_text = request.text
+    try:
+        # Generate humanized text
+        result = humanize_pipe(input_text, max_length=500, do_sample=True)
+        humanized_text = result[0]['generated_text']
+        # Get the differences
+        diff = get_diff(input_text, humanized_text)
+        # Process both texts with NLP
+        nlp_analysis = perform_nlp_analysis(input_text, humanized_text)
+        return {
+            'original_text': input_text,
+            'humanized_text': humanized_text,
+            'diff': diff,
+            'original_word_count': len(input_text.split()),
+            'humanized_word_count': len(humanized_text.split()),
+            'nlp_analysis': nlp_analysis
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error processing text: {str(e)}")
+def get_diff(text1, text2):
+    """
+    Generate a list of changes between two texts.
+    Returns a list of tuples (operation, text)
+    where operation is '+' for addition, '-' for deletion, or ' ' for unchanged.
+    """
+    d = difflib.Differ()
+    diff = list(d.compare(text1.split(), text2.split()))
+    result = []
+    for item in diff:
+        operation = item[0]
+        if operation in ['+', '-', ' ']:
+            text = item[2:]
+            result.append({'operation': operation, 'text': text})
+    return result
+def perform_nlp_analysis(original_text, humanized_text):
+    """
+    Perform comprehensive NLP analysis on both original and humanized text.
+    """
+    result = {}
+    # Process both texts with spaCy
+    original_doc = nlp(original_text)
+    humanized_doc = nlp(humanized_text)
+    # Sentiment analysis
+    original_sentiment = sentiment_analyzer.polarity_scores(original_text)
+    humanized_sentiment = sentiment_analyzer.polarity_scores(humanized_text)
+    # Extract named entities
+    original_entities = extract_entities(original_doc)
+    humanized_entities = extract_entities(humanized_doc)
+    # Extract key phrases using noun chunks
+    original_phrases = extract_key_phrases(original_doc)
+    humanized_phrases = extract_key_phrases(humanized_doc)
+    # Readability metrics
+    original_readability = calculate_readability(original_text)
+    humanized_readability = calculate_readability(humanized_text)
+    # Complexity metrics
+    original_complexity = analyze_complexity(original_doc)
+    humanized_complexity = analyze_complexity(humanized_doc)
+    # Compile all results
+    result = {
+        'original': {
+            'sentiment': original_sentiment,
+            'entities': original_entities,
+            'key_phrases': original_phrases,
+            'readability': original_readability,
+            'complexity': original_complexity
+        },
+        'humanized': {
+            'sentiment': humanized_sentiment,
+            'entities': humanized_entities,
+            'key_phrases': humanized_phrases,
+            'readability': humanized_readability,
+            'complexity': humanized_complexity
+        }
+    }
+    return result
+def extract_entities(doc):
+    """Extract and categorize named entities from a spaCy document."""
+    entities = {}
+    for ent in doc.ents:
+        if ent.label_ not in entities:
+            entities[ent.label_] = []
+        if ent.text not in entities[ent.label_]:
+            entities[ent.label_].append(ent.text)
+    return entities
+def extract_key_phrases(doc):
+    """Extract key phrases using noun chunks."""
+    return [chunk.text for chunk in doc.noun_chunks][:10]  # Limit to top 10
+def calculate_readability(text):
+    """Calculate basic readability metrics."""
+    # Count sentences
+    sentences = len(list(nltk.sent_tokenize(text)))
+    if sentences == 0:
+        sentences = 1  # Avoid division by zero
+    # Count words
+    words = len(text.split())
+    if words == 0:
+        words = 1  # Avoid division by zero
+    # Average words per sentence
+    avg_words_per_sentence = words / sentences
+    # Count syllables (simplified approach)
+    syllables = count_syllables(text)
+    # Calculate Flesch Reading Ease
+    flesch = 206.835 - 1.015 * (words / sentences) - 84.6 * (syllables / words)
+    return {
+        'sentence_count': sentences,
+        'word_count': words,
+        'avg_words_per_sentence': round(avg_words_per_sentence, 2),
+        'syllable_count': syllables,
+        'flesch_reading_ease': round(flesch, 2)
+    }
+def count_syllables(text):
+    """Count syllables in text (simplified approach)."""
+    # This is a simplified syllable counter
+    text = text.lower()
+    text = re.sub(r'[^a-zA-Z]', ' ', text)
+    words = text.split()
+    count = 0
+    for word in words:
+        word = word.strip()
+        if not word:
+            continue
+        # Count vowel groups as syllables
+        if word[-1] == 'e':
+            word = word[:-1]
+        vowel_count = len(re.findall(r'[aeiouy]+', word))
+        if vowel_count == 0:
+            vowel_count = 1
+        count += vowel_count
+    return count
+def analyze_complexity(doc):
+    """Analyze text complexity using POS tags and dependency parsing."""
+    # Count POS tags
+    pos_counts = Counter([token.pos_ for token in doc])
+    # Calculate lexical diversity
+    total_tokens = len(doc)
+    unique_tokens = len(set([token.text.lower() for token in doc]))
+    lexical_diversity = unique_tokens / total_tokens if total_tokens > 0 else 0
+    # Count dependency relationship types
+    dep_counts = Counter([token.dep_ for token in doc])
+    return {
+        'pos_distribution': dict(pos_counts),
+        'lexical_diversity': round(lexical_diversity, 4),
+        'dependency_types': dict(dep_counts)
+    }
+@app.post("/analyze", response_model=AnalyzeResponse)
+async def analyze_text(request: TextRequest):
+    """Endpoint to just analyze text without humanizing it."""
+    input_text = request.text
+    try:
+        # Process text with NLP
+        doc = nlp(input_text)
+        # Analyze text
+        sentiment = sentiment_analyzer.polarity_scores(input_text)
+        entities = extract_entities(doc)
+        key_phrases = extract_key_phrases(doc)
+        readability = calculate_readability(input_text)
+        complexity = analyze_complexity(doc)
+        return {
+            'text': input_text,
+            'word_count': len(input_text.split()),
+            'sentiment': sentiment,
+            'entities': entities,
+            'key_phrases': key_phrases,
+            'readability': readability,
+            'complexity': complexity
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error analyzing text: {str(e)}")
+# Add a root endpoint for Hugging Face Spaces health check
+@app.get("/")
+async def root():
+    return {"message": "Text Analysis and Humanization API is running!"}
+# For local development
+if __name__ == "__main__":
+    uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)