Commit
·
08080e4
1
Parent(s):
e0c4a8b
config
Browse files- Dockerfile +21 -12
- README.md +0 -19
- api.py +31 -0
- app.py +31 -0
- requirements.txt +9 -3
- scorer.py +379 -0
- src/streamlit_app.py +0 -40
- static/index.html +94 -0
Dockerfile
CHANGED
|
@@ -1,20 +1,29 @@
|
|
| 1 |
-
|
|
|
|
| 2 |
|
|
|
|
| 3 |
WORKDIR /app
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
|
| 11 |
-
|
| 12 |
-
COPY
|
| 13 |
|
| 14 |
-
|
|
|
|
| 15 |
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
-
|
|
|
|
| 19 |
|
| 20 |
-
|
|
|
|
|
|
| 1 |
+
# Use Python 3.11 (matches your local setup)
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
|
| 4 |
+
# Set the working directory
|
| 5 |
WORKDIR /app
|
| 6 |
|
| 7 |
+
# 1. Install Java (System dependency for language-tool)
|
| 8 |
+
# We do this manually here because packages.txt is ignored in Docker spaces
|
| 9 |
+
RUN apt-get update && \
|
| 10 |
+
apt-get install -y openjdk-17-jdk-headless && \
|
| 11 |
+
rm -rf /var/lib/apt/lists/*
|
| 12 |
|
| 13 |
+
# 2. Copy all your files into the container
|
| 14 |
+
COPY . .
|
| 15 |
|
| 16 |
+
# 3. Install Python libraries
|
| 17 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 18 |
|
| 19 |
+
# 4. Create a specific user (Hugging Face requirement for security)
|
| 20 |
+
RUN useradd -m -u 1000 user
|
| 21 |
+
USER user
|
| 22 |
+
ENV HOME=/home/user \
|
| 23 |
+
PATH=/home/user/.local/bin:$PATH
|
| 24 |
|
| 25 |
+
# 5. Expose port 7860 (Hugging Face specifically listens on this port)
|
| 26 |
+
EXPOSE 7860
|
| 27 |
|
| 28 |
+
# 6. Run Streamlit pointing to that specific port
|
| 29 |
+
CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
|
README.md
DELETED
|
@@ -1,19 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: Speech Transcript Evaluation System
|
| 3 |
-
emoji: 🚀
|
| 4 |
-
colorFrom: red
|
| 5 |
-
colorTo: red
|
| 6 |
-
sdk: docker
|
| 7 |
-
app_port: 8501
|
| 8 |
-
tags:
|
| 9 |
-
- streamlit
|
| 10 |
-
pinned: false
|
| 11 |
-
short_description: Streamlit template space
|
| 12 |
-
---
|
| 13 |
-
|
| 14 |
-
# Welcome to Streamlit!
|
| 15 |
-
|
| 16 |
-
Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
|
| 17 |
-
|
| 18 |
-
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
|
| 19 |
-
forums](https://discuss.streamlit.io).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
api.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, Request
|
| 2 |
+
from fastapi.responses import HTMLResponse
|
| 3 |
+
from fastapi.staticfiles import StaticFiles
|
| 4 |
+
from pydantic import BaseModel
|
| 5 |
+
from scorer import IntroductionScorer
|
| 6 |
+
import uvicorn
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
app = FastAPI()
|
| 10 |
+
|
| 11 |
+
# Input Model
|
| 12 |
+
class ScoreRequest(BaseModel):
|
| 13 |
+
transcript: str
|
| 14 |
+
duration: int
|
| 15 |
+
|
| 16 |
+
# Mount static folder to serve index.html
|
| 17 |
+
app.mount("/static", StaticFiles(directory="static"), name="static")
|
| 18 |
+
|
| 19 |
+
@app.get("/", response_class=HTMLResponse)
|
| 20 |
+
async def read_root():
|
| 21 |
+
with open(os.path.join("static", "index.html"), "r") as f:
|
| 22 |
+
return f.read()
|
| 23 |
+
|
| 24 |
+
@app.post("/api/score")
|
| 25 |
+
async def get_score(request: ScoreRequest):
|
| 26 |
+
scorer = IntroductionScorer(request.transcript, request.duration)
|
| 27 |
+
results = scorer.calculate_overall_score()
|
| 28 |
+
return results
|
| 29 |
+
|
| 30 |
+
if __name__ == "__main__":
|
| 31 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
app.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import json
|
| 3 |
+
from scorer import IntroductionScorer
|
| 4 |
+
|
| 5 |
+
st.set_page_config(page_title="Intro Scorer", layout="wide")
|
| 6 |
+
|
| 7 |
+
st.title(" Introduction Scoring System")
|
| 8 |
+
st.write("Enter your speech transcript and duration to get a detailed rubric score.")
|
| 9 |
+
|
| 10 |
+
with st.form("score_form"):
|
| 11 |
+
transcript = st.text_area("Transcript", height=200, placeholder="Hello, my name is...")
|
| 12 |
+
duration = st.number_input("Duration (seconds)", min_value=0, value=0)
|
| 13 |
+
submitted = st.form_submit_button("Analyze Score")
|
| 14 |
+
|
| 15 |
+
if submitted and transcript:
|
| 16 |
+
with st.spinner("Analyzing... (Loading AI models might take a moment)"):
|
| 17 |
+
scorer = IntroductionScorer(transcript, duration)
|
| 18 |
+
results = scorer.calculate_overall_score()
|
| 19 |
+
|
| 20 |
+
st.metric(label="Total Score", value=f"{results['Total Score']} / 100")
|
| 21 |
+
|
| 22 |
+
st.subheader("Detailed Breakdown")
|
| 23 |
+
breakdown = results['Breakdown']
|
| 24 |
+
|
| 25 |
+
for category, data in breakdown.items():
|
| 26 |
+
with st.expander(f"{category} (Score: {data['score']})"):
|
| 27 |
+
st.write(f"**Feedback:** {data['feedback']}")
|
| 28 |
+
st.progress(data['score'] / (data.get('max', 10) if data.get('max') else 15))
|
| 29 |
+
|
| 30 |
+
st.subheader("Raw JSON Data")
|
| 31 |
+
st.json(results)
|
requirements.txt
CHANGED
|
@@ -1,3 +1,9 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
streamlit
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn
|
| 3 |
+
streamlit
|
| 4 |
+
sentence-transformers
|
| 5 |
+
language-tool-python
|
| 6 |
+
vaderSentiment
|
| 7 |
+
textstat
|
| 8 |
+
spacy
|
| 9 |
+
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1.tar.gz
|
scorer.py
ADDED
|
@@ -0,0 +1,379 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
+
import spacy
|
| 4 |
+
import numpy as np
|
| 5 |
+
import language_tool_python
|
| 6 |
+
from sentence_transformers import SentenceTransformer, util, CrossEncoder
|
| 7 |
+
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
| 8 |
+
from textstat import textstat
|
| 9 |
+
|
| 10 |
+
# ==========================================
|
| 11 |
+
# 1. ENVIRONMENT & MODEL SETUP
|
| 12 |
+
# ==========================================
|
| 13 |
+
|
| 14 |
+
# Ensure Java is accessible for LanguageTool (Backend fallback)
|
| 15 |
+
# Adjust this path if your server location is different
|
| 16 |
+
if os.path.exists("/usr/lib/jvm/java-17-openjdk-amd64"):
|
| 17 |
+
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-17-openjdk-amd64"
|
| 18 |
+
|
| 19 |
+
print("Loading models... this may take a moment.")
|
| 20 |
+
|
| 21 |
+
# Load Spacy (with auto-download fallback)
|
| 22 |
+
try:
|
| 23 |
+
nlp = spacy.load("en_core_web_sm")
|
| 24 |
+
except OSError:
|
| 25 |
+
print("Spacy model not found. Downloading...")
|
| 26 |
+
spacy.cli.download("en_core_web_sm")
|
| 27 |
+
nlp = spacy.load("en_core_web_sm")
|
| 28 |
+
|
| 29 |
+
# Load AI Models
|
| 30 |
+
sbert_model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 31 |
+
grammar_tool = language_tool_python.LanguageTool('en-US')
|
| 32 |
+
sentiment_analyzer = SentimentIntensityAnalyzer()
|
| 33 |
+
# CrossEncoder for potentially deeper NLI tasks (loaded for future-proofing/robustness)
|
| 34 |
+
nli_model = CrossEncoder('cross-encoder/stsb-distilroberta-base')
|
| 35 |
+
|
| 36 |
+
print("Models loaded successfully.")
|
| 37 |
+
|
| 38 |
+
# ==========================================
|
| 39 |
+
# 2. SCORING RUBRIC CONFIGURATION
|
| 40 |
+
# ==========================================
|
| 41 |
+
|
| 42 |
+
RUBRIC = {
|
| 43 |
+
"salutation": {
|
| 44 |
+
"normal": ["hi", "hello"],
|
| 45 |
+
"good": ["good morning", "good afternoon", "good evening", "good day", "hello everyone"],
|
| 46 |
+
"excellent": ["excited to introduce", "feeling great", "pleasure to introduce", "greetings"]
|
| 47 |
+
},
|
| 48 |
+
"content": {
|
| 49 |
+
"must_have": {
|
| 50 |
+
"points": 4,
|
| 51 |
+
"topics": ["Name", "Age", "School/Class", "Family", "Hobbies/Interests"]
|
| 52 |
+
},
|
| 53 |
+
"good_to_have": {
|
| 54 |
+
"points": 2,
|
| 55 |
+
"topics": ["Origin/Location", "Ambition/Goal", "Fun Fact/Unique", "Strengths", "Achievements"]
|
| 56 |
+
}
|
| 57 |
+
},
|
| 58 |
+
"speech_rate": {
|
| 59 |
+
"fast_threshold": 160,
|
| 60 |
+
"ideal_min": 111,
|
| 61 |
+
"ideal_max": 140,
|
| 62 |
+
"slow_threshold": 80
|
| 63 |
+
},
|
| 64 |
+
"fillers": ["um", "uh", "like", "you know", "actually", "basically", "right", "i mean", "well", "kinda", "sort of", "hmm"]
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
# ==========================================
|
| 68 |
+
# 3. MAIN LOGIC CLASS
|
| 69 |
+
# ==========================================
|
| 70 |
+
|
| 71 |
+
class IntroductionScorer:
|
| 72 |
+
def __init__(self, transcript_text, audio_duration_sec=None):
|
| 73 |
+
self.text = transcript_text
|
| 74 |
+
self.doc = nlp(transcript_text)
|
| 75 |
+
self.provided_duration = float(audio_duration_sec) if audio_duration_sec else 0
|
| 76 |
+
|
| 77 |
+
self.duration_min = (self.provided_duration / 60) if self.provided_duration else 0
|
| 78 |
+
self.sentences = [sent.text.strip() for sent in self.doc.sents]
|
| 79 |
+
self.words = [token.text.lower() for token in self.doc if not token.is_punct]
|
| 80 |
+
self.total_words = len(self.words)
|
| 81 |
+
|
| 82 |
+
def score_salutation(self):
|
| 83 |
+
text_lower = self.text.lower()
|
| 84 |
+
|
| 85 |
+
for phrase in RUBRIC["salutation"]["excellent"]:
|
| 86 |
+
if phrase in text_lower:
|
| 87 |
+
return 5, f"Excellent salutation used: '{phrase}'"
|
| 88 |
+
|
| 89 |
+
for phrase in RUBRIC["salutation"]["good"]:
|
| 90 |
+
if phrase in text_lower:
|
| 91 |
+
return 4, f"Good salutation used: '{phrase}'"
|
| 92 |
+
|
| 93 |
+
for word in RUBRIC["salutation"]["normal"]:
|
| 94 |
+
if word in text_lower:
|
| 95 |
+
return 2, "Basic salutation used (Hi/Hello). Try to be more formal."
|
| 96 |
+
|
| 97 |
+
return 0, "No salutation found."
|
| 98 |
+
|
| 99 |
+
def score_content(self):
|
| 100 |
+
scores = 0
|
| 101 |
+
feedback = []
|
| 102 |
+
|
| 103 |
+
# --- Regex Checks for Specific Facts ---
|
| 104 |
+
regex_name = r"\b(name\s+is|i\s+am|i[\s'’]*m|myself|this\s+is)\s+([A-Z])"
|
| 105 |
+
regex_age = r"\b(\d+|thirteen|fourteen|fifteen|sixteen)\s*(-)?\s*(years|yrs)\b"
|
| 106 |
+
regex_school = r"\b(class|grade|standard|school|college|university|study|student)\b"
|
| 107 |
+
|
| 108 |
+
if re.search(regex_name, self.text, re.IGNORECASE):
|
| 109 |
+
scores += 4; feedback.append("[+] Name")
|
| 110 |
+
else: feedback.append("[-] Name")
|
| 111 |
+
|
| 112 |
+
if re.search(regex_age, self.text, re.IGNORECASE):
|
| 113 |
+
scores += 4; feedback.append("[+] Age")
|
| 114 |
+
else: feedback.append("[-] Age")
|
| 115 |
+
|
| 116 |
+
if re.search(regex_school, self.text, re.IGNORECASE):
|
| 117 |
+
scores += 4; feedback.append("[+] School")
|
| 118 |
+
else: feedback.append("[-] School")
|
| 119 |
+
|
| 120 |
+
# --- Robust Semantic Checks (Regex + Embeddings) ---
|
| 121 |
+
def check_topic_robust(regex, anchors, use_ai=True):
|
| 122 |
+
# 1. Fast Regex Check
|
| 123 |
+
if re.search(regex, self.text, re.IGNORECASE): return True
|
| 124 |
+
|
| 125 |
+
# 2. Deep Semantic Check
|
| 126 |
+
if use_ai and self.sentences:
|
| 127 |
+
topic_emb = sbert_model.encode(anchors, convert_to_tensor=True)
|
| 128 |
+
text_emb = sbert_model.encode(self.sentences, convert_to_tensor=True)
|
| 129 |
+
# Find max similarity between any sentence and topic anchors
|
| 130 |
+
best_score = float(util.cos_sim(text_emb, topic_emb).max())
|
| 131 |
+
return best_score > 0.35
|
| 132 |
+
return False
|
| 133 |
+
|
| 134 |
+
# Family Check
|
| 135 |
+
if check_topic_robust(r"\b(family|parents|mother|father|siblings)\b", ["My family", "I live with"]):
|
| 136 |
+
scores += 4; feedback.append("[+] Family")
|
| 137 |
+
else: feedback.append("[-] Family")
|
| 138 |
+
|
| 139 |
+
# Hobbies Check
|
| 140 |
+
if check_topic_robust(r"\b(hobby|hobbies|enjoy|like\s+(to|playing|reading)|pastime)\b", ["My hobby is", "I enjoy"]):
|
| 141 |
+
scores += 4; feedback.append("[+] Hobbies")
|
| 142 |
+
else: feedback.append("[-] Hobbies")
|
| 143 |
+
|
| 144 |
+
# --- Bonus Checks ---
|
| 145 |
+
bonuses = {
|
| 146 |
+
"Ambition": (r"\b(goal|ambition|dream|want\s+to\s+be)\b", ["I want to become"], True),
|
| 147 |
+
"Strength": (r"\b(strength|good\s+at|confident)\b", ["My strength is"], True),
|
| 148 |
+
"Unique": (r"\b(unique|special|fun\s+fact)\b", ["fun fact"], True),
|
| 149 |
+
"Origin": (r"\b(i\s+am\s+from|i['’]m\s+from|originally\s+from|live\s+in|living\s+in|born\s+in|hometown|native)\b", [], False),
|
| 150 |
+
"Achievements": (r"\b(won|achievement|award)\b", ["I won"], True)
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
for topic, (reg, anc, use_ai_flag) in bonuses.items():
|
| 154 |
+
if check_topic_robust(reg, anc, use_ai=use_ai_flag):
|
| 155 |
+
scores += 2; feedback.append(f"[+] {topic}")
|
| 156 |
+
|
| 157 |
+
return min(30, scores), ", ".join(feedback)
|
| 158 |
+
|
| 159 |
+
def score_flow(self):
|
| 160 |
+
anchors = {
|
| 161 |
+
"salutation": ["Hello everyone", "Good morning", "Hi", "Greetings"],
|
| 162 |
+
"intro": ["My name is", "I am", "I'm", "I’m", "Myself", "This is"],
|
| 163 |
+
"closing": ["Thank you", "Thanks", "That is all", "The end"],
|
| 164 |
+
"body": ["family", "mother", "school", "class", "hobby", "playing", "dream", "goal"]
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
if not self.sentences: return 0, "No text"
|
| 168 |
+
|
| 169 |
+
text_emb = sbert_model.encode(self.sentences, convert_to_tensor=True)
|
| 170 |
+
|
| 171 |
+
def get_idx(key, thresh=0.25):
|
| 172 |
+
anc = sbert_model.encode(anchors[key], convert_to_tensor=True)
|
| 173 |
+
sims = util.cos_sim(text_emb, anc).max(dim=1).values
|
| 174 |
+
best_idx = int(sims.argmax())
|
| 175 |
+
best_score = float(sims.max())
|
| 176 |
+
return best_idx, best_score > thresh
|
| 177 |
+
|
| 178 |
+
idx_s, has_s = get_idx("salutation", 0.25)
|
| 179 |
+
idx_i, has_i = get_idx("intro", 0.25)
|
| 180 |
+
idx_c, has_c = get_idx("closing", 0.30)
|
| 181 |
+
|
| 182 |
+
# Check if there is "meat" between intro and closing
|
| 183 |
+
has_body = False
|
| 184 |
+
if has_i and has_c and idx_c > idx_i:
|
| 185 |
+
if idx_c - idx_i >= 1:
|
| 186 |
+
mid_sents = self.sentences[idx_i+1 : idx_c]
|
| 187 |
+
if mid_sents:
|
| 188 |
+
mid_emb = sbert_model.encode(mid_sents, convert_to_tensor=True)
|
| 189 |
+
bod_emb = sbert_model.encode(anchors["body"], convert_to_tensor=True)
|
| 190 |
+
if util.cos_sim(mid_emb, bod_emb).max() > 0.25: has_body = True
|
| 191 |
+
|
| 192 |
+
debug_info = f"(Indices: Sal={idx_s if has_s else 'X'}, Intro={idx_i if has_i else 'X'}, End={idx_c if has_c else 'X'})"
|
| 193 |
+
|
| 194 |
+
if has_s and has_c:
|
| 195 |
+
if has_i:
|
| 196 |
+
if idx_s <= idx_i < idx_c:
|
| 197 |
+
return (5, "Perfect Flow") if has_body else (5, "Good Flow (Short body)")
|
| 198 |
+
if idx_i == idx_c:
|
| 199 |
+
return 0, f"Disordered: Introduction and Closing are detected in same sentence. {debug_info}"
|
| 200 |
+
|
| 201 |
+
elif idx_s < idx_c:
|
| 202 |
+
return (5, "Good Flow") if has_body else (5, "Acceptable Flow")
|
| 203 |
+
|
| 204 |
+
return 0, f"Flow disordered. {debug_info}"
|
| 205 |
+
|
| 206 |
+
def score_speech_rate(self):
|
| 207 |
+
if not self.provided_duration:
|
| 208 |
+
return 10, "Duration not provided (Assumed Ideal)"
|
| 209 |
+
|
| 210 |
+
wpm = self.total_words / self.duration_min if self.duration_min > 0 else 0
|
| 211 |
+
|
| 212 |
+
if 111 <= wpm <= 140: return 10, f"Ideal ({int(wpm)} WPM)"
|
| 213 |
+
if 81 <= wpm <= 160: return 6, f"Acceptable ({int(wpm)} WPM)"
|
| 214 |
+
if wpm > 140: return 2, f"Too Fast ({int(wpm)} WPM)"
|
| 215 |
+
if wpm < 81: return 2, f"Too Slow ({int(wpm)} WPM)"
|
| 216 |
+
|
| 217 |
+
return 2, f"Poor Pacing ({int(wpm)} WPM)"
|
| 218 |
+
|
| 219 |
+
def score_grammar(self):
|
| 220 |
+
try:
|
| 221 |
+
matches = grammar_tool.check(self.text)
|
| 222 |
+
scoring_errors = []
|
| 223 |
+
ignored_issues = []
|
| 224 |
+
|
| 225 |
+
# --- Intelligent Filtering of Errors ---
|
| 226 |
+
for m in matches:
|
| 227 |
+
rid = getattr(m, 'ruleId', '').upper()
|
| 228 |
+
msg = getattr(m, 'message', '').lower()
|
| 229 |
+
replacements = getattr(m, 'replacements', [])
|
| 230 |
+
|
| 231 |
+
offset = getattr(m, 'offset', 0)
|
| 232 |
+
length = getattr(m, 'errorLength', getattr(m, 'length', 5))
|
| 233 |
+
error_text = self.text[offset : offset + length]
|
| 234 |
+
|
| 235 |
+
is_ignored = False
|
| 236 |
+
|
| 237 |
+
# Ignore hyphenation suggestions if only one hyphen is missing
|
| 238 |
+
if replacements:
|
| 239 |
+
top_rep = replacements[0]
|
| 240 |
+
if "-" in top_rep and top_rep.replace("-", "") == error_text.replace(" ", ""):
|
| 241 |
+
is_ignored = True
|
| 242 |
+
|
| 243 |
+
# Ignore stylistic choices often flagged by strict grammar tools
|
| 244 |
+
ignore_keywords = [
|
| 245 |
+
"hyphen", "compound", "joined", "whitespace", "comma", "punctuation",
|
| 246 |
+
"spelling", "typo", "morfologik", "uppercase", "capitalization",
|
| 247 |
+
"repetition", "consecutive", "successive", "same word",
|
| 248 |
+
"style", "wordiness", "sentence start", "rewording", "thesaurus"
|
| 249 |
+
]
|
| 250 |
+
|
| 251 |
+
if any(k in msg or k in rid.lower() for k in ignore_keywords):
|
| 252 |
+
is_ignored = True
|
| 253 |
+
|
| 254 |
+
if is_ignored: ignored_issues.append(m)
|
| 255 |
+
else: scoring_errors.append(m)
|
| 256 |
+
|
| 257 |
+
# --- Scoring Calculation ---
|
| 258 |
+
err_count = len(scoring_errors)
|
| 259 |
+
errors_per_100 = (err_count / self.total_words) * 100 if self.total_words > 0 else 0
|
| 260 |
+
|
| 261 |
+
# Conservative penalty
|
| 262 |
+
grammar_metric = 1 - min(errors_per_100 / 5, 1)
|
| 263 |
+
|
| 264 |
+
if grammar_metric > 0.9: s=10; g="Flawless"
|
| 265 |
+
elif grammar_metric >= 0.7: s=8; g="Good"
|
| 266 |
+
elif grammar_metric >= 0.5: s=6; g="Average"
|
| 267 |
+
elif grammar_metric >= 0.3: s=4; g="Needs Improvement"
|
| 268 |
+
else: s=2; g="Poor"
|
| 269 |
+
|
| 270 |
+
# --- Feedback Formatting ---
|
| 271 |
+
fb_lines = []
|
| 272 |
+
fb_lines.append(f"{g} (Score: {s}/10)")
|
| 273 |
+
fb_lines.append("NOTE: Spelling, hyphens, punctuation, and style ignored.")
|
| 274 |
+
|
| 275 |
+
if scoring_errors:
|
| 276 |
+
fb_lines.append(f"\n[CRITICAL GRAMMAR ERRORS] ({len(scoring_errors)} found):")
|
| 277 |
+
for m in scoring_errors[:3]: # Limit to top 3
|
| 278 |
+
off = getattr(m, 'offset', 0)
|
| 279 |
+
ln = getattr(m, 'errorLength', getattr(m, 'length', 5))
|
| 280 |
+
ctx = self.text[off : off+ln+10].replace('\n', ' ')
|
| 281 |
+
fb_lines.append(f" - {m.message} (Context: '...{ctx}...')")
|
| 282 |
+
else:
|
| 283 |
+
fb_lines.append("\n[CRITICAL GRAMMAR ERRORS]: None.")
|
| 284 |
+
|
| 285 |
+
if ignored_issues:
|
| 286 |
+
fb_lines.append(f"\n[IGNORED ISSUES] ({len(ignored_issues)} found):")
|
| 287 |
+
for m in ignored_issues[:3]:
|
| 288 |
+
msg = getattr(m, 'message', 'Issue')
|
| 289 |
+
off = getattr(m, 'offset', 0)
|
| 290 |
+
ln = getattr(m, 'errorLength', getattr(m, 'length', 5))
|
| 291 |
+
ctx = self.text[off : off+ln+10].replace('\n', ' ')
|
| 292 |
+
fb_lines.append(f" - {msg} (Context: '...{ctx}...')")
|
| 293 |
+
|
| 294 |
+
return s, "\n".join(fb_lines)
|
| 295 |
+
|
| 296 |
+
except Exception as e:
|
| 297 |
+
return 5, f"Error during grammar check: {str(e)}"
|
| 298 |
+
|
| 299 |
+
def score_vocabulary(self):
|
| 300 |
+
distinct_words = len(set(self.words))
|
| 301 |
+
ttr = distinct_words / self.total_words if self.total_words > 0 else 0
|
| 302 |
+
|
| 303 |
+
if ttr >= 0.9: return 10, f"Excellent variety (TTR: {ttr:.2f})"
|
| 304 |
+
elif ttr >= 0.7: return 8, f"Good variety (TTR: {ttr:.2f})"
|
| 305 |
+
elif ttr >= 0.5: return 6, f"Average variety (TTR: {ttr:.2f})"
|
| 306 |
+
elif ttr >= 0.3: return 4, f"Repetitive (TTR: {ttr:.2f})"
|
| 307 |
+
else: return 2, f"Very repetitive (TTR: {ttr:.2f})"
|
| 308 |
+
|
| 309 |
+
def score_clarity(self):
|
| 310 |
+
filler_count = 0
|
| 311 |
+
for word in self.words:
|
| 312 |
+
if word in RUBRIC["fillers"]:
|
| 313 |
+
filler_count += 1
|
| 314 |
+
|
| 315 |
+
filler_rate = (filler_count / self.total_words) * 100 if self.total_words > 0 else 0
|
| 316 |
+
|
| 317 |
+
if filler_rate <= 3: return 15, f"Clear speech ({filler_count} fillers)"
|
| 318 |
+
elif filler_rate <= 6: return 12, f"Mostly clear ({filler_count} fillers)"
|
| 319 |
+
elif filler_rate <= 9: return 9, f"Some hesitation ({filler_count} fillers)"
|
| 320 |
+
elif filler_rate <= 12: return 6, f"Hesitant ({filler_count} fillers)"
|
| 321 |
+
else: return 3, f"Distracted by fillers ({filler_count} fillers)"
|
| 322 |
+
|
| 323 |
+
def score_engagement(self):
|
| 324 |
+
vs = sentiment_analyzer.polarity_scores(self.text)
|
| 325 |
+
|
| 326 |
+
# Normalize compound score (-1 to 1) to (0 to 1)
|
| 327 |
+
prob = (vs['compound'] + 1) / 2
|
| 328 |
+
|
| 329 |
+
high_energy_kws = [
|
| 330 |
+
"excited", "thrilled", "passionate", "delighted", "honor",
|
| 331 |
+
"love", "amazing", "wonderful", "fantastic", "energetic",
|
| 332 |
+
"grateful", "confident", "pleasure"
|
| 333 |
+
]
|
| 334 |
+
|
| 335 |
+
has_enthusiasm = any(w in self.text.lower() for w in high_energy_kws)
|
| 336 |
+
|
| 337 |
+
# Cap sentiment if it's high but lacks enthusiastic vocabulary
|
| 338 |
+
if prob >= 0.9 and not has_enthusiasm:
|
| 339 |
+
prob = 0.88
|
| 340 |
+
|
| 341 |
+
if prob >= 0.9:
|
| 342 |
+
return 15, f"Very Engaging (Sentiment: {prob:.2f})"
|
| 343 |
+
elif prob >= 0.7:
|
| 344 |
+
return 12, f"Positive (Sentiment: {prob:.2f})"
|
| 345 |
+
elif prob >= 0.5:
|
| 346 |
+
return 9, f"Neutral (Sentiment: {prob:.2f})"
|
| 347 |
+
elif prob >= 0.3:
|
| 348 |
+
return 6, f"Slightly Negative (Sentiment: {prob:.2f})"
|
| 349 |
+
else:
|
| 350 |
+
return 3, f"Negative (Sentiment: {prob:.2f})"
|
| 351 |
+
|
| 352 |
+
def calculate_overall_score(self):
|
| 353 |
+
s_salutation, f_salutation = self.score_salutation()
|
| 354 |
+
s_content, f_content = self.score_content()
|
| 355 |
+
s_flow, f_flow = self.score_flow()
|
| 356 |
+
s_rate, f_rate = self.score_speech_rate()
|
| 357 |
+
s_grammar, f_grammar = self.score_grammar()
|
| 358 |
+
s_vocab, f_vocab = self.score_vocabulary()
|
| 359 |
+
s_clarity, f_clarity = self.score_clarity()
|
| 360 |
+
s_engage, f_engage = self.score_engagement()
|
| 361 |
+
|
| 362 |
+
total_score = (
|
| 363 |
+
s_salutation + s_content + s_flow + s_rate +
|
| 364 |
+
s_grammar + s_vocab + s_clarity + s_engage
|
| 365 |
+
)
|
| 366 |
+
|
| 367 |
+
return {
|
| 368 |
+
"Total Score": total_score,
|
| 369 |
+
"Breakdown": {
|
| 370 |
+
"Salutation": {"score": s_salutation, "max": 5, "feedback": f_salutation},
|
| 371 |
+
"Content & Structure": {"score": s_content, "max": 30, "feedback": f_content},
|
| 372 |
+
"Flow": {"score": s_flow, "max": 5, "feedback": f_flow},
|
| 373 |
+
"Speech Rate": {"score": s_rate, "max": 10, "feedback": f_rate},
|
| 374 |
+
"Grammar": {"score": s_grammar, "max": 10, "feedback": f_grammar},
|
| 375 |
+
"Vocabulary": {"score": s_vocab, "max": 10, "feedback": f_vocab},
|
| 376 |
+
"Clarity (Fillers)": {"score": s_clarity, "max": 15, "feedback": f_clarity},
|
| 377 |
+
"Engagement": {"score": s_engage, "max": 15, "feedback": f_engage},
|
| 378 |
+
}
|
| 379 |
+
}
|
src/streamlit_app.py
DELETED
|
@@ -1,40 +0,0 @@
|
|
| 1 |
-
import altair as alt
|
| 2 |
-
import numpy as np
|
| 3 |
-
import pandas as pd
|
| 4 |
-
import streamlit as st
|
| 5 |
-
|
| 6 |
-
"""
|
| 7 |
-
# Welcome to Streamlit!
|
| 8 |
-
|
| 9 |
-
Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
|
| 10 |
-
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
|
| 11 |
-
forums](https://discuss.streamlit.io).
|
| 12 |
-
|
| 13 |
-
In the meantime, below is an example of what you can do with just a few lines of code:
|
| 14 |
-
"""
|
| 15 |
-
|
| 16 |
-
num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
|
| 17 |
-
num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
|
| 18 |
-
|
| 19 |
-
indices = np.linspace(0, 1, num_points)
|
| 20 |
-
theta = 2 * np.pi * num_turns * indices
|
| 21 |
-
radius = indices
|
| 22 |
-
|
| 23 |
-
x = radius * np.cos(theta)
|
| 24 |
-
y = radius * np.sin(theta)
|
| 25 |
-
|
| 26 |
-
df = pd.DataFrame({
|
| 27 |
-
"x": x,
|
| 28 |
-
"y": y,
|
| 29 |
-
"idx": indices,
|
| 30 |
-
"rand": np.random.randn(num_points),
|
| 31 |
-
})
|
| 32 |
-
|
| 33 |
-
st.altair_chart(alt.Chart(df, height=700, width=700)
|
| 34 |
-
.mark_point(filled=True)
|
| 35 |
-
.encode(
|
| 36 |
-
x=alt.X("x", axis=None),
|
| 37 |
-
y=alt.Y("y", axis=None),
|
| 38 |
-
color=alt.Color("idx", legend=None, scale=alt.Scale()),
|
| 39 |
-
size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
|
| 40 |
-
))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static/index.html
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Local Intro Scorer</title>
|
| 7 |
+
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
|
| 8 |
+
<style>
|
| 9 |
+
body { background-color: #f8f9fa; padding-top: 50px; }
|
| 10 |
+
.card-score { font-size: 2rem; font-weight: bold; color: #0d6efd; }
|
| 11 |
+
</style>
|
| 12 |
+
</head>
|
| 13 |
+
<body>
|
| 14 |
+
<div class="container">
|
| 15 |
+
<h1 class="text-center mb-4">Introduction Scoring System (Local)</h1>
|
| 16 |
+
|
| 17 |
+
<div class="row">
|
| 18 |
+
<div class="col-md-6">
|
| 19 |
+
<div class="card p-4">
|
| 20 |
+
<div class="mb-3">
|
| 21 |
+
<label for="transcript" class="form-label">Transcript</label>
|
| 22 |
+
<textarea class="form-control" id="transcript" rows="10" placeholder="Paste text here..."></textarea>
|
| 23 |
+
</div>
|
| 24 |
+
<div class="mb-3">
|
| 25 |
+
<label for="duration" class="form-label">Duration (seconds)</label>
|
| 26 |
+
<input type="number" class="form-control" id="duration" value="60">
|
| 27 |
+
</div>
|
| 28 |
+
<button onclick="analyze()" class="btn btn-primary w-100" id="btnAnalyze">Analyze</button>
|
| 29 |
+
</div>
|
| 30 |
+
</div>
|
| 31 |
+
|
| 32 |
+
<div class="col-md-6">
|
| 33 |
+
<div class="card p-4 h-100">
|
| 34 |
+
<h3>Results</h3>
|
| 35 |
+
<div id="loading" class="text-muted" style="display:none;">Processing...</div>
|
| 36 |
+
<div id="results-area" style="display:none;">
|
| 37 |
+
<div class="text-center mb-3">
|
| 38 |
+
<div>Total Score</div>
|
| 39 |
+
<div class="card-score" id="total-score">0/100</div>
|
| 40 |
+
</div>
|
| 41 |
+
<ul class="list-group" id="breakdown-list">
|
| 42 |
+
</ul>
|
| 43 |
+
</div>
|
| 44 |
+
</div>
|
| 45 |
+
</div>
|
| 46 |
+
</div>
|
| 47 |
+
</div>
|
| 48 |
+
|
| 49 |
+
<script>
|
| 50 |
+
async function analyze() {
|
| 51 |
+
const transcript = document.getElementById('transcript').value;
|
| 52 |
+
const duration = document.getElementById('duration').value;
|
| 53 |
+
const btn = document.getElementById('btnAnalyze');
|
| 54 |
+
const loading = document.getElementById('loading');
|
| 55 |
+
const resArea = document.getElementById('results-area');
|
| 56 |
+
|
| 57 |
+
if(!transcript) return alert("Please enter text");
|
| 58 |
+
|
| 59 |
+
btn.disabled = true;
|
| 60 |
+
loading.style.display = 'block';
|
| 61 |
+
resArea.style.display = 'none';
|
| 62 |
+
|
| 63 |
+
try {
|
| 64 |
+
const response = await fetch('/api/score', {
|
| 65 |
+
method: 'POST',
|
| 66 |
+
headers: {'Content-Type': 'application/json'},
|
| 67 |
+
body: JSON.stringify({ transcript: transcript, duration: parseInt(duration) })
|
| 68 |
+
});
|
| 69 |
+
|
| 70 |
+
const data = await response.json();
|
| 71 |
+
|
| 72 |
+
document.getElementById('total-score').innerText = data['Total Score'] + "/100";
|
| 73 |
+
const list = document.getElementById('breakdown-list');
|
| 74 |
+
list.innerHTML = "";
|
| 75 |
+
|
| 76 |
+
for (const [key, val] of Object.entries(data['Breakdown'])) {
|
| 77 |
+
const li = document.createElement('li');
|
| 78 |
+
li.className = "list-group-item";
|
| 79 |
+
li.innerHTML = `<strong>${key}</strong>: ${val.score} pts <br><small class='text-muted'>${val.feedback}</small>`;
|
| 80 |
+
list.appendChild(li);
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
resArea.style.display = 'block';
|
| 84 |
+
} catch (error) {
|
| 85 |
+
alert("Error connecting to backend");
|
| 86 |
+
console.error(error);
|
| 87 |
+
} finally {
|
| 88 |
+
btn.disabled = false;
|
| 89 |
+
loading.style.display = 'none';
|
| 90 |
+
}
|
| 91 |
+
}
|
| 92 |
+
</script>
|
| 93 |
+
</body>
|
| 94 |
+
</html>
|