ojaskittur commited on
Commit
08080e4
·
1 Parent(s): e0c4a8b
Files changed (8) hide show
  1. Dockerfile +21 -12
  2. README.md +0 -19
  3. api.py +31 -0
  4. app.py +31 -0
  5. requirements.txt +9 -3
  6. scorer.py +379 -0
  7. src/streamlit_app.py +0 -40
  8. static/index.html +94 -0
Dockerfile CHANGED
@@ -1,20 +1,29 @@
1
- FROM python:3.13.5-slim
 
2
 
 
3
  WORKDIR /app
4
 
5
- RUN apt-get update && apt-get install -y \
6
- build-essential \
7
- curl \
8
- git \
9
- && rm -rf /var/lib/apt/lists/*
10
 
11
- COPY requirements.txt ./
12
- COPY src/ ./src/
13
 
14
- RUN pip3 install -r requirements.txt
 
15
 
16
- EXPOSE 8501
 
 
 
 
17
 
18
- HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
 
19
 
20
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
 
1
+ # Use Python 3.11 (matches your local setup)
2
+ FROM python:3.11-slim
3
 
4
+ # Set the working directory
5
  WORKDIR /app
6
 
7
+ # 1. Install Java (System dependency for language-tool)
8
+ # We do this manually here because packages.txt is ignored in Docker spaces
9
+ RUN apt-get update && \
10
+ apt-get install -y openjdk-17-jdk-headless && \
11
+ rm -rf /var/lib/apt/lists/*
12
 
13
+ # 2. Copy all your files into the container
14
+ COPY . .
15
 
16
+ # 3. Install Python libraries
17
+ RUN pip install --no-cache-dir -r requirements.txt
18
 
19
+ # 4. Create a specific user (Hugging Face requirement for security)
20
+ RUN useradd -m -u 1000 user
21
+ USER user
22
+ ENV HOME=/home/user \
23
+ PATH=/home/user/.local/bin:$PATH
24
 
25
+ # 5. Expose port 7860 (Hugging Face specifically listens on this port)
26
+ EXPOSE 7860
27
 
28
+ # 6. Run Streamlit pointing to that specific port
29
+ CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
README.md DELETED
@@ -1,19 +0,0 @@
1
- ---
2
- title: Speech Transcript Evaluation System
3
- emoji: 🚀
4
- colorFrom: red
5
- colorTo: red
6
- sdk: docker
7
- app_port: 8501
8
- tags:
9
- - streamlit
10
- pinned: false
11
- short_description: Streamlit template space
12
- ---
13
-
14
- # Welcome to Streamlit!
15
-
16
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
17
-
18
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
19
- forums](https://discuss.streamlit.io).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
api.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request
2
+ from fastapi.responses import HTMLResponse
3
+ from fastapi.staticfiles import StaticFiles
4
+ from pydantic import BaseModel
5
+ from scorer import IntroductionScorer
6
+ import uvicorn
7
+ import os
8
+
9
+ app = FastAPI()
10
+
11
+ # Input Model
12
+ class ScoreRequest(BaseModel):
13
+ transcript: str
14
+ duration: int
15
+
16
+ # Mount static folder to serve index.html
17
+ app.mount("/static", StaticFiles(directory="static"), name="static")
18
+
19
+ @app.get("/", response_class=HTMLResponse)
20
+ async def read_root():
21
+ with open(os.path.join("static", "index.html"), "r") as f:
22
+ return f.read()
23
+
24
+ @app.post("/api/score")
25
+ async def get_score(request: ScoreRequest):
26
+ scorer = IntroductionScorer(request.transcript, request.duration)
27
+ results = scorer.calculate_overall_score()
28
+ return results
29
+
30
+ if __name__ == "__main__":
31
+ uvicorn.run(app, host="0.0.0.0", port=8000)
app.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import json
3
+ from scorer import IntroductionScorer
4
+
5
+ st.set_page_config(page_title="Intro Scorer", layout="wide")
6
+
7
+ st.title(" Introduction Scoring System")
8
+ st.write("Enter your speech transcript and duration to get a detailed rubric score.")
9
+
10
+ with st.form("score_form"):
11
+ transcript = st.text_area("Transcript", height=200, placeholder="Hello, my name is...")
12
+ duration = st.number_input("Duration (seconds)", min_value=0, value=0)
13
+ submitted = st.form_submit_button("Analyze Score")
14
+
15
+ if submitted and transcript:
16
+ with st.spinner("Analyzing... (Loading AI models might take a moment)"):
17
+ scorer = IntroductionScorer(transcript, duration)
18
+ results = scorer.calculate_overall_score()
19
+
20
+ st.metric(label="Total Score", value=f"{results['Total Score']} / 100")
21
+
22
+ st.subheader("Detailed Breakdown")
23
+ breakdown = results['Breakdown']
24
+
25
+ for category, data in breakdown.items():
26
+ with st.expander(f"{category} (Score: {data['score']})"):
27
+ st.write(f"**Feedback:** {data['feedback']}")
28
+ st.progress(data['score'] / (data.get('max', 10) if data.get('max') else 15))
29
+
30
+ st.subheader("Raw JSON Data")
31
+ st.json(results)
requirements.txt CHANGED
@@ -1,3 +1,9 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ streamlit
4
+ sentence-transformers
5
+ language-tool-python
6
+ vaderSentiment
7
+ textstat
8
+ spacy
9
+ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1.tar.gz
scorer.py ADDED
@@ -0,0 +1,379 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import spacy
4
+ import numpy as np
5
+ import language_tool_python
6
+ from sentence_transformers import SentenceTransformer, util, CrossEncoder
7
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
8
+ from textstat import textstat
9
+
10
+ # ==========================================
11
+ # 1. ENVIRONMENT & MODEL SETUP
12
+ # ==========================================
13
+
14
+ # Ensure Java is accessible for LanguageTool (Backend fallback)
15
+ # Adjust this path if your server location is different
16
+ if os.path.exists("/usr/lib/jvm/java-17-openjdk-amd64"):
17
+ os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-17-openjdk-amd64"
18
+
19
+ print("Loading models... this may take a moment.")
20
+
21
+ # Load Spacy (with auto-download fallback)
22
+ try:
23
+ nlp = spacy.load("en_core_web_sm")
24
+ except OSError:
25
+ print("Spacy model not found. Downloading...")
26
+ spacy.cli.download("en_core_web_sm")
27
+ nlp = spacy.load("en_core_web_sm")
28
+
29
+ # Load AI Models
30
+ sbert_model = SentenceTransformer('all-MiniLM-L6-v2')
31
+ grammar_tool = language_tool_python.LanguageTool('en-US')
32
+ sentiment_analyzer = SentimentIntensityAnalyzer()
33
+ # CrossEncoder for potentially deeper NLI tasks (loaded for future-proofing/robustness)
34
+ nli_model = CrossEncoder('cross-encoder/stsb-distilroberta-base')
35
+
36
+ print("Models loaded successfully.")
37
+
38
+ # ==========================================
39
+ # 2. SCORING RUBRIC CONFIGURATION
40
+ # ==========================================
41
+
42
+ RUBRIC = {
43
+ "salutation": {
44
+ "normal": ["hi", "hello"],
45
+ "good": ["good morning", "good afternoon", "good evening", "good day", "hello everyone"],
46
+ "excellent": ["excited to introduce", "feeling great", "pleasure to introduce", "greetings"]
47
+ },
48
+ "content": {
49
+ "must_have": {
50
+ "points": 4,
51
+ "topics": ["Name", "Age", "School/Class", "Family", "Hobbies/Interests"]
52
+ },
53
+ "good_to_have": {
54
+ "points": 2,
55
+ "topics": ["Origin/Location", "Ambition/Goal", "Fun Fact/Unique", "Strengths", "Achievements"]
56
+ }
57
+ },
58
+ "speech_rate": {
59
+ "fast_threshold": 160,
60
+ "ideal_min": 111,
61
+ "ideal_max": 140,
62
+ "slow_threshold": 80
63
+ },
64
+ "fillers": ["um", "uh", "like", "you know", "actually", "basically", "right", "i mean", "well", "kinda", "sort of", "hmm"]
65
+ }
66
+
67
+ # ==========================================
68
+ # 3. MAIN LOGIC CLASS
69
+ # ==========================================
70
+
71
+ class IntroductionScorer:
72
+ def __init__(self, transcript_text, audio_duration_sec=None):
73
+ self.text = transcript_text
74
+ self.doc = nlp(transcript_text)
75
+ self.provided_duration = float(audio_duration_sec) if audio_duration_sec else 0
76
+
77
+ self.duration_min = (self.provided_duration / 60) if self.provided_duration else 0
78
+ self.sentences = [sent.text.strip() for sent in self.doc.sents]
79
+ self.words = [token.text.lower() for token in self.doc if not token.is_punct]
80
+ self.total_words = len(self.words)
81
+
82
+ def score_salutation(self):
83
+ text_lower = self.text.lower()
84
+
85
+ for phrase in RUBRIC["salutation"]["excellent"]:
86
+ if phrase in text_lower:
87
+ return 5, f"Excellent salutation used: '{phrase}'"
88
+
89
+ for phrase in RUBRIC["salutation"]["good"]:
90
+ if phrase in text_lower:
91
+ return 4, f"Good salutation used: '{phrase}'"
92
+
93
+ for word in RUBRIC["salutation"]["normal"]:
94
+ if word in text_lower:
95
+ return 2, "Basic salutation used (Hi/Hello). Try to be more formal."
96
+
97
+ return 0, "No salutation found."
98
+
99
+ def score_content(self):
100
+ scores = 0
101
+ feedback = []
102
+
103
+ # --- Regex Checks for Specific Facts ---
104
+ regex_name = r"\b(name\s+is|i\s+am|i[\s'’]*m|myself|this\s+is)\s+([A-Z])"
105
+ regex_age = r"\b(\d+|thirteen|fourteen|fifteen|sixteen)\s*(-)?\s*(years|yrs)\b"
106
+ regex_school = r"\b(class|grade|standard|school|college|university|study|student)\b"
107
+
108
+ if re.search(regex_name, self.text, re.IGNORECASE):
109
+ scores += 4; feedback.append("[+] Name")
110
+ else: feedback.append("[-] Name")
111
+
112
+ if re.search(regex_age, self.text, re.IGNORECASE):
113
+ scores += 4; feedback.append("[+] Age")
114
+ else: feedback.append("[-] Age")
115
+
116
+ if re.search(regex_school, self.text, re.IGNORECASE):
117
+ scores += 4; feedback.append("[+] School")
118
+ else: feedback.append("[-] School")
119
+
120
+ # --- Robust Semantic Checks (Regex + Embeddings) ---
121
+ def check_topic_robust(regex, anchors, use_ai=True):
122
+ # 1. Fast Regex Check
123
+ if re.search(regex, self.text, re.IGNORECASE): return True
124
+
125
+ # 2. Deep Semantic Check
126
+ if use_ai and self.sentences:
127
+ topic_emb = sbert_model.encode(anchors, convert_to_tensor=True)
128
+ text_emb = sbert_model.encode(self.sentences, convert_to_tensor=True)
129
+ # Find max similarity between any sentence and topic anchors
130
+ best_score = float(util.cos_sim(text_emb, topic_emb).max())
131
+ return best_score > 0.35
132
+ return False
133
+
134
+ # Family Check
135
+ if check_topic_robust(r"\b(family|parents|mother|father|siblings)\b", ["My family", "I live with"]):
136
+ scores += 4; feedback.append("[+] Family")
137
+ else: feedback.append("[-] Family")
138
+
139
+ # Hobbies Check
140
+ if check_topic_robust(r"\b(hobby|hobbies|enjoy|like\s+(to|playing|reading)|pastime)\b", ["My hobby is", "I enjoy"]):
141
+ scores += 4; feedback.append("[+] Hobbies")
142
+ else: feedback.append("[-] Hobbies")
143
+
144
+ # --- Bonus Checks ---
145
+ bonuses = {
146
+ "Ambition": (r"\b(goal|ambition|dream|want\s+to\s+be)\b", ["I want to become"], True),
147
+ "Strength": (r"\b(strength|good\s+at|confident)\b", ["My strength is"], True),
148
+ "Unique": (r"\b(unique|special|fun\s+fact)\b", ["fun fact"], True),
149
+ "Origin": (r"\b(i\s+am\s+from|i['’]m\s+from|originally\s+from|live\s+in|living\s+in|born\s+in|hometown|native)\b", [], False),
150
+ "Achievements": (r"\b(won|achievement|award)\b", ["I won"], True)
151
+ }
152
+
153
+ for topic, (reg, anc, use_ai_flag) in bonuses.items():
154
+ if check_topic_robust(reg, anc, use_ai=use_ai_flag):
155
+ scores += 2; feedback.append(f"[+] {topic}")
156
+
157
+ return min(30, scores), ", ".join(feedback)
158
+
159
+ def score_flow(self):
160
+ anchors = {
161
+ "salutation": ["Hello everyone", "Good morning", "Hi", "Greetings"],
162
+ "intro": ["My name is", "I am", "I'm", "I’m", "Myself", "This is"],
163
+ "closing": ["Thank you", "Thanks", "That is all", "The end"],
164
+ "body": ["family", "mother", "school", "class", "hobby", "playing", "dream", "goal"]
165
+ }
166
+
167
+ if not self.sentences: return 0, "No text"
168
+
169
+ text_emb = sbert_model.encode(self.sentences, convert_to_tensor=True)
170
+
171
+ def get_idx(key, thresh=0.25):
172
+ anc = sbert_model.encode(anchors[key], convert_to_tensor=True)
173
+ sims = util.cos_sim(text_emb, anc).max(dim=1).values
174
+ best_idx = int(sims.argmax())
175
+ best_score = float(sims.max())
176
+ return best_idx, best_score > thresh
177
+
178
+ idx_s, has_s = get_idx("salutation", 0.25)
179
+ idx_i, has_i = get_idx("intro", 0.25)
180
+ idx_c, has_c = get_idx("closing", 0.30)
181
+
182
+ # Check if there is "meat" between intro and closing
183
+ has_body = False
184
+ if has_i and has_c and idx_c > idx_i:
185
+ if idx_c - idx_i >= 1:
186
+ mid_sents = self.sentences[idx_i+1 : idx_c]
187
+ if mid_sents:
188
+ mid_emb = sbert_model.encode(mid_sents, convert_to_tensor=True)
189
+ bod_emb = sbert_model.encode(anchors["body"], convert_to_tensor=True)
190
+ if util.cos_sim(mid_emb, bod_emb).max() > 0.25: has_body = True
191
+
192
+ debug_info = f"(Indices: Sal={idx_s if has_s else 'X'}, Intro={idx_i if has_i else 'X'}, End={idx_c if has_c else 'X'})"
193
+
194
+ if has_s and has_c:
195
+ if has_i:
196
+ if idx_s <= idx_i < idx_c:
197
+ return (5, "Perfect Flow") if has_body else (5, "Good Flow (Short body)")
198
+ if idx_i == idx_c:
199
+ return 0, f"Disordered: Introduction and Closing are detected in same sentence. {debug_info}"
200
+
201
+ elif idx_s < idx_c:
202
+ return (5, "Good Flow") if has_body else (5, "Acceptable Flow")
203
+
204
+ return 0, f"Flow disordered. {debug_info}"
205
+
206
+ def score_speech_rate(self):
207
+ if not self.provided_duration:
208
+ return 10, "Duration not provided (Assumed Ideal)"
209
+
210
+ wpm = self.total_words / self.duration_min if self.duration_min > 0 else 0
211
+
212
+ if 111 <= wpm <= 140: return 10, f"Ideal ({int(wpm)} WPM)"
213
+ if 81 <= wpm <= 160: return 6, f"Acceptable ({int(wpm)} WPM)"
214
+ if wpm > 140: return 2, f"Too Fast ({int(wpm)} WPM)"
215
+ if wpm < 81: return 2, f"Too Slow ({int(wpm)} WPM)"
216
+
217
+ return 2, f"Poor Pacing ({int(wpm)} WPM)"
218
+
219
+ def score_grammar(self):
220
+ try:
221
+ matches = grammar_tool.check(self.text)
222
+ scoring_errors = []
223
+ ignored_issues = []
224
+
225
+ # --- Intelligent Filtering of Errors ---
226
+ for m in matches:
227
+ rid = getattr(m, 'ruleId', '').upper()
228
+ msg = getattr(m, 'message', '').lower()
229
+ replacements = getattr(m, 'replacements', [])
230
+
231
+ offset = getattr(m, 'offset', 0)
232
+ length = getattr(m, 'errorLength', getattr(m, 'length', 5))
233
+ error_text = self.text[offset : offset + length]
234
+
235
+ is_ignored = False
236
+
237
+ # Ignore hyphenation suggestions if only one hyphen is missing
238
+ if replacements:
239
+ top_rep = replacements[0]
240
+ if "-" in top_rep and top_rep.replace("-", "") == error_text.replace(" ", ""):
241
+ is_ignored = True
242
+
243
+ # Ignore stylistic choices often flagged by strict grammar tools
244
+ ignore_keywords = [
245
+ "hyphen", "compound", "joined", "whitespace", "comma", "punctuation",
246
+ "spelling", "typo", "morfologik", "uppercase", "capitalization",
247
+ "repetition", "consecutive", "successive", "same word",
248
+ "style", "wordiness", "sentence start", "rewording", "thesaurus"
249
+ ]
250
+
251
+ if any(k in msg or k in rid.lower() for k in ignore_keywords):
252
+ is_ignored = True
253
+
254
+ if is_ignored: ignored_issues.append(m)
255
+ else: scoring_errors.append(m)
256
+
257
+ # --- Scoring Calculation ---
258
+ err_count = len(scoring_errors)
259
+ errors_per_100 = (err_count / self.total_words) * 100 if self.total_words > 0 else 0
260
+
261
+ # Conservative penalty
262
+ grammar_metric = 1 - min(errors_per_100 / 5, 1)
263
+
264
+ if grammar_metric > 0.9: s=10; g="Flawless"
265
+ elif grammar_metric >= 0.7: s=8; g="Good"
266
+ elif grammar_metric >= 0.5: s=6; g="Average"
267
+ elif grammar_metric >= 0.3: s=4; g="Needs Improvement"
268
+ else: s=2; g="Poor"
269
+
270
+ # --- Feedback Formatting ---
271
+ fb_lines = []
272
+ fb_lines.append(f"{g} (Score: {s}/10)")
273
+ fb_lines.append("NOTE: Spelling, hyphens, punctuation, and style ignored.")
274
+
275
+ if scoring_errors:
276
+ fb_lines.append(f"\n[CRITICAL GRAMMAR ERRORS] ({len(scoring_errors)} found):")
277
+ for m in scoring_errors[:3]: # Limit to top 3
278
+ off = getattr(m, 'offset', 0)
279
+ ln = getattr(m, 'errorLength', getattr(m, 'length', 5))
280
+ ctx = self.text[off : off+ln+10].replace('\n', ' ')
281
+ fb_lines.append(f" - {m.message} (Context: '...{ctx}...')")
282
+ else:
283
+ fb_lines.append("\n[CRITICAL GRAMMAR ERRORS]: None.")
284
+
285
+ if ignored_issues:
286
+ fb_lines.append(f"\n[IGNORED ISSUES] ({len(ignored_issues)} found):")
287
+ for m in ignored_issues[:3]:
288
+ msg = getattr(m, 'message', 'Issue')
289
+ off = getattr(m, 'offset', 0)
290
+ ln = getattr(m, 'errorLength', getattr(m, 'length', 5))
291
+ ctx = self.text[off : off+ln+10].replace('\n', ' ')
292
+ fb_lines.append(f" - {msg} (Context: '...{ctx}...')")
293
+
294
+ return s, "\n".join(fb_lines)
295
+
296
+ except Exception as e:
297
+ return 5, f"Error during grammar check: {str(e)}"
298
+
299
+ def score_vocabulary(self):
300
+ distinct_words = len(set(self.words))
301
+ ttr = distinct_words / self.total_words if self.total_words > 0 else 0
302
+
303
+ if ttr >= 0.9: return 10, f"Excellent variety (TTR: {ttr:.2f})"
304
+ elif ttr >= 0.7: return 8, f"Good variety (TTR: {ttr:.2f})"
305
+ elif ttr >= 0.5: return 6, f"Average variety (TTR: {ttr:.2f})"
306
+ elif ttr >= 0.3: return 4, f"Repetitive (TTR: {ttr:.2f})"
307
+ else: return 2, f"Very repetitive (TTR: {ttr:.2f})"
308
+
309
+ def score_clarity(self):
310
+ filler_count = 0
311
+ for word in self.words:
312
+ if word in RUBRIC["fillers"]:
313
+ filler_count += 1
314
+
315
+ filler_rate = (filler_count / self.total_words) * 100 if self.total_words > 0 else 0
316
+
317
+ if filler_rate <= 3: return 15, f"Clear speech ({filler_count} fillers)"
318
+ elif filler_rate <= 6: return 12, f"Mostly clear ({filler_count} fillers)"
319
+ elif filler_rate <= 9: return 9, f"Some hesitation ({filler_count} fillers)"
320
+ elif filler_rate <= 12: return 6, f"Hesitant ({filler_count} fillers)"
321
+ else: return 3, f"Distracted by fillers ({filler_count} fillers)"
322
+
323
+ def score_engagement(self):
324
+ vs = sentiment_analyzer.polarity_scores(self.text)
325
+
326
+ # Normalize compound score (-1 to 1) to (0 to 1)
327
+ prob = (vs['compound'] + 1) / 2
328
+
329
+ high_energy_kws = [
330
+ "excited", "thrilled", "passionate", "delighted", "honor",
331
+ "love", "amazing", "wonderful", "fantastic", "energetic",
332
+ "grateful", "confident", "pleasure"
333
+ ]
334
+
335
+ has_enthusiasm = any(w in self.text.lower() for w in high_energy_kws)
336
+
337
+ # Cap sentiment if it's high but lacks enthusiastic vocabulary
338
+ if prob >= 0.9 and not has_enthusiasm:
339
+ prob = 0.88
340
+
341
+ if prob >= 0.9:
342
+ return 15, f"Very Engaging (Sentiment: {prob:.2f})"
343
+ elif prob >= 0.7:
344
+ return 12, f"Positive (Sentiment: {prob:.2f})"
345
+ elif prob >= 0.5:
346
+ return 9, f"Neutral (Sentiment: {prob:.2f})"
347
+ elif prob >= 0.3:
348
+ return 6, f"Slightly Negative (Sentiment: {prob:.2f})"
349
+ else:
350
+ return 3, f"Negative (Sentiment: {prob:.2f})"
351
+
352
+ def calculate_overall_score(self):
353
+ s_salutation, f_salutation = self.score_salutation()
354
+ s_content, f_content = self.score_content()
355
+ s_flow, f_flow = self.score_flow()
356
+ s_rate, f_rate = self.score_speech_rate()
357
+ s_grammar, f_grammar = self.score_grammar()
358
+ s_vocab, f_vocab = self.score_vocabulary()
359
+ s_clarity, f_clarity = self.score_clarity()
360
+ s_engage, f_engage = self.score_engagement()
361
+
362
+ total_score = (
363
+ s_salutation + s_content + s_flow + s_rate +
364
+ s_grammar + s_vocab + s_clarity + s_engage
365
+ )
366
+
367
+ return {
368
+ "Total Score": total_score,
369
+ "Breakdown": {
370
+ "Salutation": {"score": s_salutation, "max": 5, "feedback": f_salutation},
371
+ "Content & Structure": {"score": s_content, "max": 30, "feedback": f_content},
372
+ "Flow": {"score": s_flow, "max": 5, "feedback": f_flow},
373
+ "Speech Rate": {"score": s_rate, "max": 10, "feedback": f_rate},
374
+ "Grammar": {"score": s_grammar, "max": 10, "feedback": f_grammar},
375
+ "Vocabulary": {"score": s_vocab, "max": 10, "feedback": f_vocab},
376
+ "Clarity (Fillers)": {"score": s_clarity, "max": 15, "feedback": f_clarity},
377
+ "Engagement": {"score": s_engage, "max": 15, "feedback": f_engage},
378
+ }
379
+ }
src/streamlit_app.py DELETED
@@ -1,40 +0,0 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
- import streamlit as st
5
-
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static/index.html ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Local Intro Scorer</title>
7
+ <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
8
+ <style>
9
+ body { background-color: #f8f9fa; padding-top: 50px; }
10
+ .card-score { font-size: 2rem; font-weight: bold; color: #0d6efd; }
11
+ </style>
12
+ </head>
13
+ <body>
14
+ <div class="container">
15
+ <h1 class="text-center mb-4">Introduction Scoring System (Local)</h1>
16
+
17
+ <div class="row">
18
+ <div class="col-md-6">
19
+ <div class="card p-4">
20
+ <div class="mb-3">
21
+ <label for="transcript" class="form-label">Transcript</label>
22
+ <textarea class="form-control" id="transcript" rows="10" placeholder="Paste text here..."></textarea>
23
+ </div>
24
+ <div class="mb-3">
25
+ <label for="duration" class="form-label">Duration (seconds)</label>
26
+ <input type="number" class="form-control" id="duration" value="60">
27
+ </div>
28
+ <button onclick="analyze()" class="btn btn-primary w-100" id="btnAnalyze">Analyze</button>
29
+ </div>
30
+ </div>
31
+
32
+ <div class="col-md-6">
33
+ <div class="card p-4 h-100">
34
+ <h3>Results</h3>
35
+ <div id="loading" class="text-muted" style="display:none;">Processing...</div>
36
+ <div id="results-area" style="display:none;">
37
+ <div class="text-center mb-3">
38
+ <div>Total Score</div>
39
+ <div class="card-score" id="total-score">0/100</div>
40
+ </div>
41
+ <ul class="list-group" id="breakdown-list">
42
+ </ul>
43
+ </div>
44
+ </div>
45
+ </div>
46
+ </div>
47
+ </div>
48
+
49
+ <script>
50
+ async function analyze() {
51
+ const transcript = document.getElementById('transcript').value;
52
+ const duration = document.getElementById('duration').value;
53
+ const btn = document.getElementById('btnAnalyze');
54
+ const loading = document.getElementById('loading');
55
+ const resArea = document.getElementById('results-area');
56
+
57
+ if(!transcript) return alert("Please enter text");
58
+
59
+ btn.disabled = true;
60
+ loading.style.display = 'block';
61
+ resArea.style.display = 'none';
62
+
63
+ try {
64
+ const response = await fetch('/api/score', {
65
+ method: 'POST',
66
+ headers: {'Content-Type': 'application/json'},
67
+ body: JSON.stringify({ transcript: transcript, duration: parseInt(duration) })
68
+ });
69
+
70
+ const data = await response.json();
71
+
72
+ document.getElementById('total-score').innerText = data['Total Score'] + "/100";
73
+ const list = document.getElementById('breakdown-list');
74
+ list.innerHTML = "";
75
+
76
+ for (const [key, val] of Object.entries(data['Breakdown'])) {
77
+ const li = document.createElement('li');
78
+ li.className = "list-group-item";
79
+ li.innerHTML = `<strong>${key}</strong>: ${val.score} pts <br><small class='text-muted'>${val.feedback}</small>`;
80
+ list.appendChild(li);
81
+ }
82
+
83
+ resArea.style.display = 'block';
84
+ } catch (error) {
85
+ alert("Error connecting to backend");
86
+ console.error(error);
87
+ } finally {
88
+ btn.disabled = false;
89
+ loading.style.display = 'none';
90
+ }
91
+ }
92
+ </script>
93
+ </body>
94
+ </html>