BeastGokul commited on
Commit
b563200
·
verified ·
1 Parent(s): 8626f14

Update llm.py

Browse files
Files changed (1) hide show
  1. llm.py +64 -64
llm.py CHANGED
@@ -1,64 +1,64 @@
1
- from transformers import AutoTokenizer, AutoModelForCausalLM
2
- from user_data import load_user_data, save_user_data
3
- from phonetics import analyze_audio_phonetically, extract_phonemes
4
-
5
- model_name = "BeastGokul/Nika-1.5B"
6
- llm_tokenizer = AutoTokenizer.from_pretrained(model_name)
7
- llm_model = AutoModelForCausalLM.from_pretrained(model_name)
8
-
9
- SYSTEM_PROMPT = """You are a specialized pronunciation assistant for non-native English speakers.\nYour job is to provide targeted, actionable feedback based on the user's speech or description.\n\nWhen analyzing pronunciation:\n1. Identify at most 2 specific phonemes or pronunciation patterns that need improvement\n2. Explain how the sound is correctly formed (tongue position, lip movement, etc.)\n3. Suggest one simple, targeted exercise for practice\n4. Be encouraging and note any improvements from previous sessions\n5. Use simple language appropriate for language learners\n\nWhen provided with phonetic analysis data, incorporate this information into your feedback.\n"""
10
-
11
- def get_llm_feedback(audio=None, text=None, reference_text=None, user_id="default", transcribe_func=None):
12
- user_data = load_user_data(user_id)
13
- # Process audio if provided
14
- if audio:
15
- from user_data import save_audio
16
- audio_path = save_audio(audio, user_id)
17
- # Transcribe if no text was provided
18
- if not text and transcribe_func:
19
- text = transcribe_func(audio_path)
20
- # Get phonetic analysis
21
- phonetic_analysis = analyze_audio_phonetically(audio_path, reference_text)
22
- phonetic_info = f"""
23
- Phonetic analysis:\n- Detected phonemes: {phonetic_analysis['detected_phonemes']}\n"""
24
- if reference_text:
25
- phonetic_info += f"- Reference phonemes: {phonetic_analysis.get('reference_phonemes', 'N/A')}\n"
26
- else:
27
- audio_path = None
28
- phonetic_info = ""
29
- # Get user history context
30
- history_context = ""
31
- if user_data["practice_sessions"]:
32
- phoneme_counts = {p: data["practice_count"] for p, data in user_data["phoneme_progress"].items()}
33
- challenging = sorted(phoneme_counts.items(), key=lambda x: x[1], reverse=True)[:3]
34
- history_context = f"""
35
- User has practiced {len(user_data['practice_sessions'])} times before.\nCommon challenging phonemes: {', '.join([p for p, _ in challenging])}.\n"""
36
- # Build prompt for LLM
37
- if text:
38
- user_input = f"I said: '{text}'"
39
- if reference_text and reference_text != text:
40
- user_input += f". I was trying to say: '{reference_text}'"
41
- else:
42
- user_input = "Please analyze my pronunciation."
43
- full_prompt = f"""{SYSTEM_PROMPT}\n\nUser history:\n{history_context}\n\n{phonetic_info}\n\nUser: {user_input}\n"""
44
- # Get LLM response
45
- inputs = llm_tokenizer(full_prompt, return_tensors="pt").to(llm_model.device)
46
- import torch
47
- with torch.no_grad():
48
- outputs = llm_model.generate(
49
- **inputs,
50
- max_new_tokens=200,
51
- temperature=0.7,
52
- top_p=0.9,
53
- do_sample=True
54
- )
55
- response = llm_tokenizer.decode(outputs[0], skip_special_tokens=True)
56
- try:
57
- response = response.split("Assistant: ")[-1].strip()
58
- except:
59
- pass
60
- # Track the session if audio was provided
61
- if audio_path:
62
- from user_data import track_practice_session
63
- track_practice_session(user_id, audio_path, text, reference_text, response)
64
- return response, text
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM
2
+ #from user_data import load_user_data, save_user_data
3
+ from phonetics import analyze_audio_phonetically, extract_phonemes
4
+
5
+ model_name = "BeastGokul/Nika-1.5B"
6
+ llm_tokenizer = AutoTokenizer.from_pretrained(model_name)
7
+ llm_model = AutoModelForCausalLM.from_pretrained(model_name)
8
+
9
+ SYSTEM_PROMPT = """You are a specialized pronunciation assistant for non-native English speakers.\nYour job is to provide targeted, actionable feedback based on the user's speech or description.\n\nWhen analyzing pronunciation:\n1. Identify at most 2 specific phonemes or pronunciation patterns that need improvement\n2. Explain how the sound is correctly formed (tongue position, lip movement, etc.)\n3. Suggest one simple, targeted exercise for practice\n4. Be encouraging and note any improvements from previous sessions\n5. Use simple language appropriate for language learners\n\nWhen provided with phonetic analysis data, incorporate this information into your feedback.\n"""
10
+
11
+ def get_llm_feedback(audio=None, text=None, reference_text=None, user_id="default", transcribe_func=None):
12
+ user_data = load_user_data(user_id)
13
+ # Process audio if provided
14
+ if audio:
15
+ from user_data import save_audio
16
+ audio_path = save_audio(audio, user_id)
17
+ # Transcribe if no text was provided
18
+ if not text and transcribe_func:
19
+ text = transcribe_func(audio_path)
20
+ # Get phonetic analysis
21
+ phonetic_analysis = analyze_audio_phonetically(audio_path, reference_text)
22
+ phonetic_info = f"""
23
+ Phonetic analysis:\n- Detected phonemes: {phonetic_analysis['detected_phonemes']}\n"""
24
+ if reference_text:
25
+ phonetic_info += f"- Reference phonemes: {phonetic_analysis.get('reference_phonemes', 'N/A')}\n"
26
+ else:
27
+ audio_path = None
28
+ phonetic_info = ""
29
+ # Get user history context
30
+ history_context = ""
31
+ if user_data["practice_sessions"]:
32
+ phoneme_counts = {p: data["practice_count"] for p, data in user_data["phoneme_progress"].items()}
33
+ challenging = sorted(phoneme_counts.items(), key=lambda x: x[1], reverse=True)[:3]
34
+ history_context = f"""
35
+ User has practiced {len(user_data['practice_sessions'])} times before.\nCommon challenging phonemes: {', '.join([p for p, _ in challenging])}.\n"""
36
+ # Build prompt for LLM
37
+ if text:
38
+ user_input = f"I said: '{text}'"
39
+ if reference_text and reference_text != text:
40
+ user_input += f". I was trying to say: '{reference_text}'"
41
+ else:
42
+ user_input = "Please analyze my pronunciation."
43
+ full_prompt = f"""{SYSTEM_PROMPT}\n\nUser history:\n{history_context}\n\n{phonetic_info}\n\nUser: {user_input}\n"""
44
+ # Get LLM response
45
+ inputs = llm_tokenizer(full_prompt, return_tensors="pt").to(llm_model.device)
46
+ import torch
47
+ with torch.no_grad():
48
+ outputs = llm_model.generate(
49
+ **inputs,
50
+ max_new_tokens=200,
51
+ temperature=0.7,
52
+ top_p=0.9,
53
+ do_sample=True
54
+ )
55
+ response = llm_tokenizer.decode(outputs[0], skip_special_tokens=True)
56
+ try:
57
+ response = response.split("Assistant: ")[-1].strip()
58
+ except:
59
+ pass
60
+ # Track the session if audio was provided
61
+ if audio_path:
62
+ from user_data import track_practice_session
63
+ track_practice_session(user_id, audio_path, text, reference_text, response)
64
+ return response, text