EsTu / app.py
kambris's picture
Update app.py
1fbc1f9 verified
import gradio as gr
import requests
import os
from datetime import datetime
from collections import defaultdict
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
MODEL = "meta-llama/Llama-3.3-70B-Instruct"
class LearningTracker:
def __init__(self):
self.vocabulary = set()
self.grammar_points = set()
self.error_patterns = defaultdict(int)
self.review_items = []
self.session_start = datetime.now()
def add_vocabulary(self, words):
self.vocabulary.update(words)
def add_grammar(self, point):
self.grammar_points.add(point)
def log_error(self, error_type):
self.error_patterns[error_type] += 1
def get_stats(self):
session_duration = (datetime.now() - self.session_start).seconds // 60
return {
'vocabulary_count': len(self.vocabulary),
'grammar_points': len(self.grammar_points),
'errors': dict(self.error_patterns),
'review_queue': len(self.review_items),
'session_minutes': session_duration
}
tracker = LearningTracker()
def build_system_prompt(stats):
error_focus = ""
if stats['errors']:
top_errors = sorted(stats['errors'].items(), key=lambda x: x[1], reverse=True)[:3]
error_list = ", ".join([f"{e[0]} ({e[1]}x)" for e in top_errors])
error_focus = f"\n\nCOMMON ERRORS TO ADDRESS: {error_list}\nGently reinforce these areas in conversation."
memory_context = ""
if stats['vocabulary_count'] > 0:
memory_context = f"\n\nLEARNING PROGRESS:\n- Vocabulary introduced: {stats['vocabulary_count']} words\n- Grammar points covered: {stats['grammar_points']}\n- Items ready for review: {stats['review_queue']}\nReference previous topics naturally in conversation."
return f"""You're tutoring a linguistics PhD student learning Spanish (A2-B1 level). They're fluent in English, French, and Arabic.
Use B1-level Spanish for all conversation. Adapt to whatever they need - conversation practice, grammar questions, vocabulary building, or scenario practice.
Balance accessible comparisons with metalinguistic insight:
- Simple: "Spanish 'he comido' = French 'j'ai mangé' = English 'I have eaten'"
- Analytical: "Notice the pro-drop here - Spanish allows null subjects unlike French"
- Pattern recognition: "The subjunctive after 'querer que' works like French 'vouloir que' + subjonctif"
For grammar questions, give clear side-by-side comparisons with linguistic depth:
- "Spanish: 'Yo como' / French: 'Je mange' / English: 'I eat' / Arabic: 'آكل' (subject optional in Spanish/Arabic)"
- Discuss: "Both Spanish and Arabic are pro-drop languages, unlike French and English"
- Show conjugation patterns across languages, note morphological strategies
For vocabulary, start with practical comparisons, layer in analysis:
- Spanish: "almohada" / Arabic: "المخدة" (al-mikhadda) → discuss Arabic substrate
- Spanish: "importante" / French: "important" / English: "important" → Latin cognates
- Note each new word you introduce by marking it: [VOCAB: word]
For errors, gently recast and mark the pattern: [ERROR: ser/estar] or [ERROR: subjunctive] or [ERROR: preterite/imperfect]
When you see repeated error patterns, address them directly but kindly.{error_focus}{memory_context}
SPACED REPETITION: Every 5-7 messages, naturally weave in a review question about vocabulary or grammar from earlier in the conversation."""
def extract_learning_data(text):
vocab = []
grammar = []
errors = []
if '[VOCAB:' in text:
parts = text.split('[VOCAB:')
for part in parts[1:]:
word = part.split(']')[0].strip()
vocab.append(word)
if '[GRAMMAR:' in text:
parts = text.split('[GRAMMAR:')
for part in parts[1:]:
point = part.split(']')[0].strip()
grammar.append(point)
if '[ERROR:' in text:
parts = text.split('[ERROR:')
for part in parts[1:]:
error_type = part.split(']')[0].strip()
errors.append(error_type)
return vocab, grammar, errors
def get_progress_display():
stats = tracker.get_stats()
progress_text = f"""### 📊 Your Progress
**Session Stats:**
- ⏱️ Time: {stats['session_minutes']} minutes
- 📚 Vocabulary: {stats['vocabulary_count']} words
- 📖 Grammar points: {stats['grammar_points']}
- 🔄 Review queue: {stats['review_queue']} items
"""
if stats['errors']:
progress_text += "\n**Error Patterns (focus areas):**\n"
for error, count in sorted(stats['errors'].items(), key=lambda x: x[1], reverse=True):
progress_text += f"- {error}: {count}x\n"
return progress_text
def query_model(messages, stream=True):
API_URL = "https://router.huggingface.co/v1/chat/completions"
headers = {
"Authorization": f"Bearer {HF_TOKEN}",
"Content-Type": "application/json"
}
payload = {
"model": MODEL,
"messages": messages,
"max_tokens": 1000,
"temperature": 0.7,
"stream": stream
}
response = requests.post(API_URL, headers=headers, json=payload, timeout=120, stream=stream)
return response
def request_review(history):
if history is None:
history = []
stats = tracker.get_stats()
if stats['review_queue'] == 0 and stats['vocabulary_count'] == 0:
review_msg = "No hay nada que revisar todavía. (Nothing to review yet. Keep conversing!)"
else:
review_msg = "Dame un repaso de lo que hemos aprendido. (Give me a review of what we've learned.)"
return history + [[review_msg, None]]
with gr.Blocks() as demo:
gr.Markdown("# 🇪🇸 Spanish Tutor - Advanced Learning System")
gr.Markdown("*Powered by Llama 3.3 70B with memory, spaced repetition, and progress tracking*")
with gr.Row():
with gr.Column(scale=2):
chatbot = gr.Chatbot(height=500)
msg = gr.Textbox(label="Message", placeholder="Type in Spanish, English, French, or Arabic...")
with gr.Row():
send = gr.Button("Send", variant="primary")
review = gr.Button("📝 Request Review", variant="secondary")
clear = gr.Button("Clear")
with gr.Column(scale=1):
progress_display = gr.Markdown(get_progress_display())
gr.Markdown("""
### 💡 Tips
**The tutor tracks:**
- New vocabulary you learn
- Grammar points covered
- Your common error patterns
- Items for spaced repetition
**Error patterns help focus practice on:**
- ser/estar confusion
- subjunctive usage
- preterite/imperfect
- gender agreement
Click **Request Review** for spaced repetition practice.
""")
def user_submit(user_message, history):
return "", history + [[user_message, None]]
def bot_respond(history):
if history is None or len(history) == 0:
return history, get_progress_display()
user_message = history[-1][0]
stats = tracker.get_stats()
messages = [
{"role": "system", "content": build_system_prompt(stats)}
]
for user_msg, assistant_msg in history[:-1]:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": user_message})
try:
response_obj = query_model(messages, stream=True)
if response_obj.status_code == 200:
full_response = ""
for line in response_obj.iter_lines():
if line:
line = line.decode('utf-8')
if line.startswith('data: '):
line = line[6:]
if line.strip() == '[DONE]':
break
try:
import json
chunk = json.loads(line)
if 'choices' in chunk and len(chunk['choices']) > 0:
delta = chunk['choices'][0].get('delta', {})
content = delta.get('content', '')
if content:
full_response += content
history[-1][1] = full_response
yield history, get_progress_display()
except:
continue
vocab, grammar, errors = extract_learning_data(full_response)
tracker.add_vocabulary(vocab)
for g in grammar:
tracker.add_grammar(g)
for e in errors:
tracker.log_error(e)
else:
history[-1][1] = f"Error {response_obj.status_code}: {response_obj.text}"
yield history, get_progress_display()
except Exception as e:
history[-1][1] = f"Error: {str(e)}"
yield history, get_progress_display()
msg.submit(user_submit, [msg, chatbot], [msg, chatbot], queue=False).then(
bot_respond, chatbot, [chatbot, progress_display]
)
send.click(user_submit, [msg, chatbot], [msg, chatbot], queue=False).then(
bot_respond, chatbot, [chatbot, progress_display]
)
review.click(request_review, chatbot, chatbot, queue=False).then(
bot_respond, chatbot, [chatbot, progress_display]
)
clear.click(lambda: [], None, chatbot, queue=False)
if __name__ == "__main__":
demo.launch()