Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import json | |
| import os | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from sentence_transformers import SentenceTransformer | |
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| # ============================================================================= | |
| # Configuration | |
| # ============================================================================= | |
| GGUF_REPO = "antonisbast/Llama-3.2-3B-Gordon-Ramsay-DPO-GGUF" | |
| GGUF_FILE = "Llama-3.2-3B-Instruct.Q4_K_M.gguf" | |
| CHUNKS_FILE = "chunks.json" | |
| EMBEDDINGS_FILE = "embeddings.npy" | |
| # ============================================================================= | |
| # Load RAG data | |
| # ============================================================================= | |
| print("๐ Loading RAG data...") | |
| with open(CHUNKS_FILE, "r") as f: | |
| chunks = json.load(f) | |
| chunk_embeddings = np.load(EMBEDDINGS_FILE) | |
| print(f" {len(chunks)} chunks, embeddings shape: {chunk_embeddings.shape}") | |
| # ============================================================================= | |
| # Load embedding model (CPU, fast) | |
| # ============================================================================= | |
| print("๐งฎ Loading embedding model...") | |
| embed_model = SentenceTransformer("all-MiniLM-L6-v2", device="cpu") | |
| print(" all-MiniLM-L6-v2 ready") | |
| # ============================================================================= | |
| # Load GGUF model (CPU) | |
| # ============================================================================= | |
| print("๐ค Downloading GGUF model...") | |
| model_path = hf_hub_download( | |
| repo_id=GGUF_REPO, | |
| filename=GGUF_FILE, | |
| ) | |
| print(f" Downloaded to {model_path}") | |
| print("๐ค Loading LLM (this takes ~30s)...") | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=2048, | |
| n_threads=4, | |
| n_gpu_layers=0, # CPU only | |
| verbose=False, | |
| ) | |
| print(" LLM ready!") | |
| def generate_text(prompt, max_tokens=256, temperature=0.7): | |
| """Generate text using GGUF model.""" | |
| output = llm( | |
| prompt, | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=0.9, | |
| repeat_penalty=1.1, | |
| stop=["Student Question:", "\n\nQuestion:", "###"], | |
| ) | |
| return output["choices"][0]["text"].strip() | |
| # ============================================================================= | |
| # RAG Pipeline Steps | |
| # ============================================================================= | |
| def paraphrase_query(query): | |
| """Generate 2 paraphrases of the query.""" | |
| paraphrases = [] | |
| for i in range(2): | |
| prompt = f"""Paraphrase the following question. Use different wording while keeping the same meaning. | |
| Return ONLY the paraphrased question, nothing else. | |
| Original: {query} | |
| Paraphrased:""" | |
| result = generate_text(prompt, max_tokens=80, temperature=0.7) | |
| clean = result.split("\n")[0].strip().strip('"').strip("'") | |
| if clean and clean.lower() != query.lower(): | |
| paraphrases.append(clean) | |
| return paraphrases | |
| def retrieve_chunks(queries, top_k=5, threshold=0.3, final_top=3): | |
| """Retrieve relevant chunks for all query variants.""" | |
| all_results = {} | |
| for q in queries: | |
| q_embedding = embed_model.encode([q]) | |
| sims = cosine_similarity(q_embedding, chunk_embeddings)[0] | |
| top_indices = np.argsort(sims)[-top_k:][::-1] | |
| for idx in top_indices: | |
| score = float(sims[idx]) | |
| if score >= threshold: | |
| if idx not in all_results or score > all_results[idx]: | |
| all_results[idx] = score | |
| sorted_results = sorted(all_results.items(), key=lambda x: x[1], reverse=True) | |
| top_results = sorted_results[:final_top] | |
| retrieved = [] | |
| for idx, score in top_results: | |
| retrieved.append({"text": chunks[idx], "score": score, "index": idx}) | |
| return retrieved | |
| def generate_answer(query, context_chunks): | |
| """Generate Gordon Ramsay-style answer from retrieved context.""" | |
| context = "\n\n".join([c["text"] for c in context_chunks]) | |
| prompt = f"""You are Gordon Ramsay, but instead of cooking, you teach Deep Learning. | |
| Answer the student's question using ONLY the provided context. | |
| Rules: | |
| - Be concise (max 3-4 sentences) | |
| - Use cooking metaphors | |
| - Be brutally honest in Gordon Ramsay's style | |
| - Explain the concept correctly based on the context | |
| - Do NOT use emojis | |
| Context: | |
| {context} | |
| Student Question: {query} | |
| Gordon Ramsay:""" | |
| return generate_text(prompt, max_tokens=200, temperature=0.7) | |
| # ============================================================================= | |
| # Main pipeline | |
| # ============================================================================= | |
| def rag_pipeline(query): | |
| """Full RAG pipeline with step-by-step outputs.""" | |
| if not query or not query.strip(): | |
| return "Please enter a question.", "", "" | |
| query = query.strip() | |
| # Step 1: Paraphrase | |
| yield "โณ Generating paraphrases...", "", "" | |
| paraphrases = paraphrase_query(query) | |
| all_queries = [query] + paraphrases | |
| paraphrase_text = f"**Original:** {query}\n\n" | |
| for i, p in enumerate(paraphrases, 1): | |
| paraphrase_text += f"**Paraphrase {i}:** {p}\n\n" | |
| # Step 2: Retrieve | |
| yield paraphrase_text, "โณ Retrieving relevant chunks...", "" | |
| retrieved = retrieve_chunks(all_queries) | |
| if not retrieved: | |
| yield ( | |
| paraphrase_text, | |
| "โ ๏ธ No chunks found above the 0.3 similarity threshold.", | |
| "I couldn't find relevant context to answer your question.", | |
| ) | |
| return | |
| retrieval_text = "" | |
| for i, chunk in enumerate(retrieved, 1): | |
| preview = chunk["text"][:300] + "..." if len(chunk["text"]) > 300 else chunk["text"] | |
| retrieval_text += f"**Chunk {i}** (similarity: {chunk['score']:.3f})\n\n" | |
| retrieval_text += f"```\n{preview}\n```\n\n" | |
| # Step 3: Generate answer | |
| yield paraphrase_text, retrieval_text, "โณ Gordon Ramsay is thinking..." | |
| answer = generate_answer(query, retrieved) | |
| answer_display = f"๐ฅ **Gordon Ramsay says:**\n\n*{answer}*" | |
| yield paraphrase_text, retrieval_text, answer_display | |
| # ============================================================================= | |
| # Gradio UI | |
| # ============================================================================= | |
| EXAMPLES = [ | |
| "Explain dropout and why we use it.", | |
| "Explain backpropagation.", | |
| "Explain the vanishing gradient problem.", | |
| "Explain why transformers use attention.", | |
| "Explain batch normalization.", | |
| "Explain why we use ReLU instead of sigmoid.", | |
| ] | |
| with gr.Blocks( | |
| theme=gr.themes.Soft(), | |
| title="Gordon Ramsay RAG", | |
| css="footer {visibility: hidden}", | |
| ) as demo: | |
| gr.HTML(""" | |
| <div style="text-align: center; margin-bottom: 0.5em;"> | |
| <h1>๐จโ๐ณ Gordon Ramsay RAG โ Deep Learning Tutor</h1> | |
| <p style="color: #666; font-size: 1.1em;"> | |
| Ask a Deep Learning question. Get a textbook-grounded, Ramsay-style answer. | |
| </p> | |
| <p style="color: #999; font-size: 0.9em;"> | |
| โก Running on CPU โ responses take 20-40s. Be patient, unlike Ramsay. | |
| </p> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| query_input = gr.Textbox( | |
| label="Your Deep Learning Question", | |
| placeholder="e.g., What is dropout and why do we use it?", | |
| lines=2, | |
| ) | |
| with gr.Column(scale=1, min_width=120): | |
| submit_btn = gr.Button("๐ฅ Ask Ramsay!", variant="primary", size="lg") | |
| gr.Examples(examples=EXAMPLES, inputs=query_input, label="Try these:") | |
| gr.HTML("<hr>") | |
| with gr.Accordion("๐ Step 1: Query Paraphrasing", open=True): | |
| paraphrase_output = gr.Markdown() | |
| with gr.Accordion("๐ Step 2: Chunk Retrieval", open=True): | |
| retrieval_output = gr.Markdown() | |
| with gr.Accordion("๐ฅ Step 3: Gordon Ramsay's Answer", open=True): | |
| answer_output = gr.Markdown() | |
| gr.HTML(""" | |
| <div style="text-align: center; padding: 1em; color: #888; font-size: 0.85em;"> | |
| <p> | |
| <b>Model:</b> <a href="https://huggingface.co/antonisbast/Llama-3.2-3B-Gordon-Ramsay-DPO">Llama-3.2-3B-Gordon-Ramsay-DPO</a> | | |
| <b>Dataset:</b> <a href="https://huggingface.co/datasets/antonisbast/gordon-ramsay-dl-instruct">gordon-ramsay-dl-instruct</a> | | |
| <b>Knowledge Base:</b> 807 chunks from Introduction to Deep Learning (Notre Dame, 2025) | |
| </p> | |
| <p>Built for MSc AI & Deep Learning (AIDL_B_CS01) โ University of West Attica</p> | |
| </div> | |
| """) | |
| submit_btn.click( | |
| fn=rag_pipeline, | |
| inputs=[query_input], | |
| outputs=[paraphrase_output, retrieval_output, answer_output], | |
| ) | |
| query_input.submit( | |
| fn=rag_pipeline, | |
| inputs=[query_input], | |
| outputs=[paraphrase_output, retrieval_output, answer_output], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |