import gradio as gr
import torch
import os
import shutil
import subprocess

os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Maximize Hugging Face CPU Tier performance by limiting thread thrashing
torch.set_num_threads(os.cpu_count() or 2)

from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_text_splitters import RecursiveCharacterTextSplitter, Language
from langchain_core.documents import Document

EXTENSION_TO_LANGUAGE = {
    '.py': Language.PYTHON,
    '.js': Language.JS,
    '.ts': Language.JS,
    '.java': Language.JAVA,
    '.cpp': Language.CPP,
    '.c': Language.CPP,
    '.h': Language.CPP,
    '.go': Language.GO,
    '.rs': Language.RUST,
    '.rb': Language.RUBY,
    '.html': Language.HTML,
    '.md': Language.MARKDOWN,
}
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain_community.vectorstores import FAISS
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate

# 1. HARDWARE OPTIMIZED LLM LOADING

def load_llm():
    model_id = "Qwen/Qwen2.5-Coder-0.5B-Instruct"
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="cpu",
        torch_dtype=torch.float32,
        low_cpu_mem_usage=True
    )
    
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        return_full_text=False
    )
    return HuggingFacePipeline(
        pipeline=pipe, 
        pipeline_kwargs={"max_new_tokens": 512, "temperature": 0.1, "repetition_penalty": 1.1}
    )

# 2. CODE INGESTION & VECTOR DATABASE

def setup_vector_db():
    if not os.path.exists('./repo'):
        os.makedirs('./repo')
        
    docs_by_language = {}
    generic_docs = []
    file_count = 0
    
    for root, _, files in os.walk('./repo'):
        if '.git' in root:
            continue
        for file in files:
            file_path = os.path.join(root, file)
            ext = os.path.splitext(file)[1].lower()
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    content = f.read()
                doc = Document(page_content=content, metadata={"source": file_path})
                file_count += 1
                
                lang = EXTENSION_TO_LANGUAGE.get(ext)
                if lang:
                    if lang not in docs_by_language:
                        docs_by_language[lang] = []
                    docs_by_language[lang].append(doc)
                else:
                    generic_docs.append(doc)
            except UnicodeDecodeError:
                pass # Skip binary files
                
    if file_count == 0:
        return None, 0

    all_splits = []
    
    # Split documents by specific language rules
    for lang, docs in docs_by_language.items():
        try:
            splitter = RecursiveCharacterTextSplitter.from_language(
                language=lang, 
                chunk_size=1000, 
                chunk_overlap=200
            )
            all_splits.extend(splitter.split_documents(docs))
        except Exception:
            # Fallback if language is not supported by installed langchain version
            generic_docs.extend(docs)
            
    # Split generic documents
    if generic_docs:
        generic_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000, 
            chunk_overlap=200
        )
        all_splits.extend(generic_splitter.split_documents(generic_docs))
        
    if not all_splits:
        return None, 0
    
    embeddings = HuggingFaceEmbeddings(
        model_name="all-MiniLM-L6-v2", 
        model_kwargs={'device': 'cpu'}, 
        encode_kwargs={'normalize_embeddings': False}
    )
    db = FAISS.from_documents(all_splits, embeddings)
    
    return db, file_count

# 3. GLOBAL INITIALIZATION
print("Initializing models...")
device_status = "🟢 GPU Active" if torch.cuda.is_available() else "🟡 CPU Mode"
llm = load_llm()
vector_db, file_count = setup_vector_db()

prompt_template = """You are an expert Software Engineer and Codebase Assistant. Your ONLY purpose is to answer questions related to the provided codebase or general programming/coding questions. 
If the user asks a question that is NOT related to coding, programming, or the provided codebase, you must politely refuse to answer and remind them that you are a code-focused assistant.

When answering:
1. Carefully analyze the provided context.
2. Provide a clear, step-by-step explanation.
3. If providing code, use markdown code blocks.
4. If the answer cannot be found in the context, explicitly state that you don't know rather than hallucinating.

Codebase Context:
{context}

Question: {input}
Expert Developer Answer:"""

prompt = PromptTemplate.from_template(prompt_template)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def build_qa_chain(db):
    if not db:
        return None
    retriever = db.as_retriever(search_kwargs={"k": 5})
    return (
        {"context": retriever, "input": RunnablePassthrough()}
        | RunnablePassthrough.assign(
            answer=(
                RunnablePassthrough.assign(context=lambda x: format_docs(x["context"]))
                | prompt
                | llm
                | StrOutputParser()
            )
        )
    )

qa_chain = build_qa_chain(vector_db)

# 4. INGESTION FUNCTIONS
def clone_and_index(repo_url):
    global vector_db, file_count, qa_chain
    if not repo_url or not repo_url.strip():
        return "⚠️ Please enter a valid GitHub URL."

    if os.path.exists('./repo'):
        shutil.rmtree('./repo')
    
    try:
        subprocess.run(["git", "clone", repo_url.strip(), "./repo"], check=True, capture_output=True, text=True)
    except subprocess.CalledProcessError as e:
        return f"❌ Failed to clone repo. Error: {e.stderr}"
    except Exception as e:
        return f"❌ Failed to clone repo: {str(e)}"
        
    vector_db, file_count = setup_vector_db()
    qa_chain = build_qa_chain(vector_db)
    
    if vector_db:
        return f"✅ Success! {file_count} files indexed from `{repo_url}`"
    else:
        return f"⚠️ Warning: No valid text files found in `{repo_url}`"

def upload_and_index(files):
    global vector_db, file_count, qa_chain
    if not files:
        return "⚠️ No files were uploaded."

    if os.path.exists('./repo'):
        shutil.rmtree('./repo')
    os.makedirs('./repo', exist_ok=True)
    
    for file in files:
        # Handle both Gradio 3 (filepath string) and Gradio 4 (File object)
        file_path = getattr(file, "name", str(file))
        dest_path = os.path.join('./repo', os.path.basename(file_path))
        shutil.copy(file_path, dest_path)
        
    vector_db, file_count = setup_vector_db()
    qa_chain = build_qa_chain(vector_db)
    
    if vector_db:
        return f"✅ Success! {file_count} files indexed from local upload"
    else:
        return "⚠️ Warning: No valid text files found in the uploaded files"

# 5. CHAT LOGIC
def respond(message, chat_history):
    if not message.strip():
        return "", chat_history
        
    if not vector_db:
        bot_message = "👋 Welcome! Please provide a repo link or upload your code files using the panel on the left to start chatting."
        chat_history.append({"role": "user", "content": message})
        chat_history.append({"role": "assistant", "content": bot_message})
        return "", chat_history
    
    try:
        # Fetch response from RAG
        response = qa_chain.invoke(message)
        answer = response["answer"]
        sources = response["context"]
        
        final_answer = answer
        
        if sources:
            final_answer += "\n\n<details><summary>🔍 View Source Code Referenced</summary>\n\n"
            for idx, doc in enumerate(sources):
                source_file = doc.metadata.get("source", "Unknown File")
                final_answer += f"**Snippet {idx + 1}** from `{source_file}`:\n"
                final_answer += f"```python\n{doc.page_content}\n```\n\n"
            final_answer += "</details>"
            
        chat_history.append({"role": "user", "content": message})
        chat_history.append({"role": "assistant", "content": final_answer})
    except Exception as e:
        bot_message = f"❌ An error occurred during processing: {str(e)}"
        chat_history.append({"role": "user", "content": message})
        chat_history.append({"role": "assistant", "content": bot_message})

    return "", chat_history

# 6. GRADIO UI
custom_css = """
.status-box { padding: 15px; border-radius: 8px; background-color: #f0f0f0; margin-bottom: 20px; border-left: 4px solid #007bff;}
.dark .status-box { background-color: #1e293b; color: #cbd5e1; border-left: 4px solid #3b82f6;}
.instructions { font-size: 0.95em; color: #555; }
.dark .instructions { color: #aaa; }
"""

def get_initial_repo_status():
    if vector_db:
        return f"✅ **Ready!** {file_count} files indexed and loaded."
    return "❌ **Empty Database.** Provide a codebase below to begin."

with gr.Blocks(title="Codebase Assistant") as demo:
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("# 🦖 RepoRaptor\n**Your personal AI codebase expert.**")
            gr.Markdown("---")
            
            with gr.Column(elem_classes=["status-box"]):
                gr.Markdown("### 📊 System Status")
                gr.Markdown(f"**Hardware:** {device_status}")
                repo_status = gr.Markdown(get_initial_repo_status())
            
            gr.Markdown("### 📂 Ingest Codebase")
            gr.Markdown("Choose a method to load your codebase into the Vector Database.", elem_classes=["instructions"])
            
            with gr.Tabs():
                with gr.Tab("GitHub Repo"):
                    gr.Markdown("Clone a public repository directly:")
                    repo_url = gr.Textbox(placeholder="https://github.com/user/repo", show_label=False)
                    clone_btn = gr.Button("⬇️ Clone & Index", variant="primary")
                with gr.Tab("Local Upload"):
                    gr.Markdown("Upload local codebase files:")
                    local_files = gr.File(file_count="multiple", label="Upload Files")
                    upload_btn = gr.Button("📤 Upload & Index", variant="primary")

            clone_btn.click(fn=clone_and_index, inputs=[repo_url], outputs=[repo_status])
            upload_btn.click(fn=upload_and_index, inputs=[local_files], outputs=[repo_status])
        
        with gr.Column(scale=3):
            gr.Markdown("### 💻 Chat Interface\nAsk architecture questions, find bugs, or request code explanations. I will **only** answer questions related to code.")
            chatbot = gr.Chatbot(height=600, show_label=False)
            
            with gr.Row():
                msg = gr.Textbox(placeholder="E.g., What does the main function do? (Press Enter to send)", show_label=False, scale=4)
                clear = gr.Button("🗑️ Clear Chat", scale=1)

            msg.submit(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
            clear.click(lambda: ("", []), inputs=None, outputs=[msg, chatbot], queue=False)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", css=custom_css, theme=gr.themes.Soft())