import ollama
import re
from config import MODEL_NAME
from langdetect import detect

import pandas as pd 


#client = ollama.Client(timeout=60)

#def augment_question(question: str, lang: str, n: int = 3):
#    

#    system_prompt = (
#        "You generate alternative user questions.\n"
#        "Rules:\n"
#        "- Keep the SAME meaning\n"
#        "- Do NOT answer\n"
#        "- Output ONLY a list\n"
#        "- Each line is ONE question\n"
#        f"- Generate exactly {n} variations\n"
#    )

#    if lang == "ar":
#        system_prompt += "- Use Arabic conversational style\n"

#    prompt = f"{system_prompt}\nOriginal question:\n{question}"

#    response = client.generate(
#        model=MODEL_NAME,
#        prompt=prompt,
#        options={"temperature": 0.3}
#    )

#    text = response["response"]

#    # Clean output 
#    questions = []
#    for line in text.split("\n"):
#        line = re.sub(r"^[\-\*\d\.\)]\s*", "", line).strip()
#        if line and line.lower() != question.lower():
#            questions.append(line)

#    return questions[:n]












def augment_question_smart(question: str, lang: str, n: int = 5):
    
    system_prompt = (
        "Generate alternative ways to ask the same question.\n"
        "Rules:\n"
        "- Keep EXACT same meaning\n"
        "- Use different wordings and structures\n"
        "- Include formal and informal versions\n"
        "- Include questions with typos/mistakes users might make\n"
        f"- Generate exactly {n} variations\n"
        "- Output ONLY the questions, one per line\n"
    )




    if lang == "ar":
        system_prompt += """
- استخدم اللهجة المصرية والفصحى
- أضف أخطاء إملائية شائعة
- استخدم صيغ مختلفة (ماذا، كيف، هل، ممكن، عايز)
- حافظ على نفس المعى 
- استخدم كلمات مختلفة تعطى نفس المعنى 
- اخلق طرق بديلة لصياغة نفس السؤال 
- اخلق طرق بديلة لصياغة نفس السؤال باللغة العربية الفصحى واللهجة الدارجة 
"""

    prompt = f"{system_prompt}\n\nالسؤال الأصلي:\n{question}"



    response = ollama.generate(
        model=MODEL_NAME,
        prompt=prompt,
        options={"temperature": 0.5}  # higher temp for diversity
    )



    text = response["response"]
    questions = []
    
    for line in text.split("\n"):
        line = re.sub(r"^[\-\*\d\.\)]\s*", "", line).strip()
        if line and line.lower() != question.lower():
            questions.append(line)



    return questions[:n]




# update loader 
def load_csv_with_augmentation(path):
    # load csv and augment 
    df = pd.read_csv(path)
    df.columns = [c.strip().lower() for c in df.columns]
    
    chunks = []
    


    for _, row in df.iterrows():
        q = str(row["question"]).strip()
        a = str(row["answer"]).strip()
        
        if not q or not a:
            continue
        
        # 0riginal 
        chunks.append(f"Question: {q}\nAnswer: {a}")
        


        # detect language
        lang = detect(q)
        


        # add augmented one
        augmented = augment_question_smart(q, lang, n=5)   ## 6 
        for aug_q in augmented:
            chunks.append(f"Question: {aug_q}\nAnswer: {a}")
    
    return chunks