Spaces:
Runtime error
Runtime error
| """ | |
| OpenKB + OpenRouter (Llama 3.3-70B) — Marktechpost Tutorial | |
| Hugging Face Space entry point | |
| """ | |
| import os | |
| import sys | |
| import subprocess | |
| import shutil | |
| import textwrap | |
| import re | |
| import time | |
| from pathlib import Path | |
| from collections import Counter | |
| import gradio as gr | |
| # ── helpers ────────────────────────────────────────────────────────────────── | |
| KB_DIR = Path("/tmp/my_knowledge_base") | |
| wiki_dir = KB_DIR / "wiki" | |
| raw_dir = KB_DIR / "raw" | |
| LLM_MODEL = "openrouter/meta-llama/llama-3.3-70b-instruct:free" | |
| def run_cmd(cmd: str, cwd=None) -> tuple[str, str]: | |
| result = subprocess.run( | |
| cmd, shell=True, text=True, | |
| capture_output=True, cwd=cwd | |
| ) | |
| return result.stdout.strip(), result.stderr.strip() | |
| def kb_cmd(command: str) -> str: | |
| stdout, stderr = run_cmd(f"openkb {command}", cwd=str(KB_DIR)) | |
| return stdout or stderr | |
| def section_header(title: str) -> str: | |
| bar = "─" * (len(title) + 4) | |
| return f"\n┌{bar}┐\n│ {title} │\n└{bar}┘\n" | |
| def show_tree(root: Path, indent=0, max_depth=3) -> list[str]: | |
| if indent > max_depth: | |
| return [] | |
| lines = [] | |
| prefix = " " * indent + ("└─ " if indent else "") | |
| lines.append(prefix + root.name + ("/" if root.is_dir() else "")) | |
| if root.is_dir(): | |
| for child in sorted(root.iterdir()): | |
| lines.extend(show_tree(child, indent + 1, max_depth)) | |
| return lines | |
| # ── document corpus ────────────────────────────────────────────────────────── | |
| DOCS = { | |
| "transformer_architecture.md": textwrap.dedent("""\ | |
| # Transformer Architecture | |
| ## Overview | |
| The Transformer is a deep learning architecture introduced in "Attention Is All | |
| You Need" (Vaswani et al., 2017). It replaced recurrent networks with a | |
| self-attention mechanism, enabling parallel training and better long-range | |
| dependency modelling. | |
| ## Key Components | |
| - **Multi-Head Self-Attention**: Computes attention in h parallel heads, each | |
| with its own learned Q/K/V projections, then concatenates and projects. | |
| - **Feed-Forward Network (FFN)**: Two linear layers with a ReLU activation, | |
| applied position-wise. | |
| - **Positional Encoding**: Sinusoidal or learned embeddings that inject | |
| sequence-order information, since attention is permutation-invariant. | |
| - **Layer Normalisation**: Applied before (Pre-LN) or after (Post-LN) each | |
| sub-layer, stabilising gradients. | |
| - **Residual Connections**: Added around each sub-layer to ease gradient flow. | |
| ## Encoder vs Decoder | |
| The encoder stack processes input tokens bidirectionally (e.g. BERT). | |
| The decoder stack uses causal (masked) attention over previous outputs plus | |
| cross-attention over encoder outputs (e.g. GPT, T5). | |
| ## Scaling Laws | |
| Kaplan et al. (2020) showed that model loss decreases predictably as a power | |
| law with compute, data, and parameter count. This motivated GPT-3 (175B) and | |
| subsequent large language models. | |
| ## Limitations | |
| - Quadratic complexity in sequence length: O(n^2) | |
| - No inherent recurrence -> long-context challenges | |
| - High memory footprint during training | |
| ## References | |
| Vaswani et al. (2017). Attention Is All You Need. NeurIPS. | |
| Kaplan et al. (2020). Scaling Laws for Neural Language Models. arXiv:2001.08361. | |
| """), | |
| "rag_systems.md": textwrap.dedent("""\ | |
| # Retrieval-Augmented Generation (RAG) | |
| ## Definition | |
| RAG augments a generative LLM with a retrieval step: given a query, relevant | |
| documents are fetched from a corpus and prepended to the prompt, giving the | |
| model grounded context beyond its training data. | |
| ## Architecture | |
| 1. **Indexing Phase** — Documents are chunked, embedded via a bi-encoder | |
| (e.g. text-embedding-3-large), and stored in a vector database (e.g. | |
| Faiss, Pinecone, Weaviate). | |
| 2. **Retrieval Phase** — The user query is embedded; approximate nearest- | |
| neighbour (ANN) search returns the top-k chunks. | |
| 3. **Generation Phase** — Retrieved chunks + query are passed to the LLM | |
| which synthesises a final answer. | |
| ## Variants | |
| - **Dense Retrieval**: DPR, Contriever — queries and docs in the same space. | |
| - **Sparse Retrieval**: BM25 — term frequency-based, no embeddings needed. | |
| - **Hybrid Retrieval**: Reciprocal Rank Fusion (RRF) combines dense + sparse. | |
| - **Re-ranking**: A cross-encoder re-scores the top-k before the LLM sees them. | |
| ## Challenges | |
| - Context window limits: long retrieved passages may not fit. | |
| - Retrieval quality is a hard ceiling on generation quality. | |
| - Chunking strategy significantly affects recall. | |
| - Multi-hop questions require iterative retrieval (IRCoT, ReAct). | |
| ## References | |
| Lewis et al. (2020). RAG for Knowledge-Intensive NLP Tasks. NeurIPS. | |
| Gao et al. (2023). RAG for Large Language Models. arXiv:2312.10997. | |
| """), | |
| "knowledge_graph_integration.md": textwrap.dedent("""\ | |
| # Knowledge Graphs and LLM Integration | |
| ## What is a Knowledge Graph? | |
| A knowledge graph (KG) is a directed labelled graph of entities (nodes) and | |
| relations (edges): (subject, predicate, object) triples, e.g. | |
| (Vaswani, authored, "Attention Is All You Need"). | |
| ## Why Combine KGs with LLMs? | |
| LLMs hallucinate facts; KGs provide structured, verifiable ground truth. | |
| KGs are hard to query in natural language; LLMs provide the interface. | |
| Together they enable faithful, grounded, explainable question answering. | |
| ## Integration Strategies | |
| ### KG-Augmented Generation (KGAG) | |
| Retrieve triples or sub-graphs instead of text chunks, serialise into text, | |
| then feed to the LLM prompt. | |
| ### LLM-Assisted KG Construction | |
| LLMs extract (subject, relation, object) triples from unstructured text, | |
| reducing manual curation effort significantly. | |
| ### GraphRAG (Microsoft Research, 2024) | |
| GraphRAG clusters document communities, generates community summaries, and | |
| stores them in a KG. Queries answered by map-reduce over community summaries | |
| outperform flat-vector RAG on sensemaking tasks. | |
| ## Challenges | |
| - KG construction quality depends on extraction LLM accuracy. | |
| - Graph databases add infrastructure complexity. | |
| - Ontology design requires domain expertise. | |
| - KGs go stale without continuous update pipelines. | |
| ## References | |
| Edge et al. (2024). From Local to Global: A Graph RAG Approach. arXiv:2404.16130. | |
| Pan et al. (2023). Unifying LLMs and KGs. IEEE Intelligent Systems. | |
| """), | |
| } | |
| QUERIES = [ | |
| "What is the Transformer architecture and what problem did it solve?", | |
| "How does RAG differ from a traditional knowledge base like OpenKB?", | |
| "What are the connections between knowledge graphs, RAG, and transformers?", | |
| "What are the shared limitations across all three AI topics covered?", | |
| ] | |
| # ── core pipeline ───────────────────────────────────────────────────────────── | |
| def run_tutorial(api_key: str, run_query: str): | |
| """Generator that yields log lines progressively.""" | |
| api_key = api_key.strip() | |
| if not api_key: | |
| yield "❌ Please enter your OpenRouter API key before running." | |
| return | |
| # ── Set env ────────────────────────────────────────────────────────────── | |
| os.environ["OPENROUTER_API_KEY"] = api_key | |
| os.environ["LLM_API_KEY"] = api_key | |
| yield "📦 Installing OpenKB…" | |
| stdout, stderr = run_cmd("pip install openkb --quiet") | |
| if stderr and "ERROR" in stderr.upper(): | |
| yield f"❌ pip error:\n{stderr}" | |
| return | |
| yield "✅ OpenKB installed.\n" | |
| # ── Step 1: Init KB ────────────────────────────────────────────────────── | |
| yield section_header("Step 1 — Initialise Knowledge Base") | |
| if KB_DIR.exists(): | |
| shutil.rmtree(KB_DIR) | |
| KB_DIR.mkdir(parents=True) | |
| config_dir = KB_DIR / ".openkb" | |
| config_dir.mkdir() | |
| (config_dir / "config.yaml").write_text( | |
| f"model: {LLM_MODEL}\nlanguage: en\npageindex_threshold: 20\n" | |
| ) | |
| (KB_DIR / ".env").write_text( | |
| f"OPENROUTER_API_KEY={api_key}\nLLM_API_KEY={api_key}\n" | |
| ) | |
| for sub in ["sources", "summaries", "concepts", "explorations", "reports"]: | |
| (wiki_dir / sub).mkdir(parents=True) | |
| (wiki_dir / "AGENTS.md").write_text(textwrap.dedent("""\ | |
| # Wiki Schema | |
| ## Conventions | |
| - All pages use Markdown with [[wikilinks]] for cross-references. | |
| - `summaries/` -- one page per source document. | |
| - `concepts/` -- cross-document synthesis pages. | |
| - `index.md` -- knowledge base overview. | |
| - `log.md` -- operations timeline. | |
| """)) | |
| (wiki_dir / "index.md").write_text("# Knowledge Base Index\n\nNo documents indexed yet.\n") | |
| (wiki_dir / "log.md").write_text("# Operations Log\n\n") | |
| raw_dir.mkdir() | |
| for fname, content in DOCS.items(): | |
| (raw_dir / fname).write_text(content) | |
| yield f"✅ KB initialised at: {KB_DIR}" | |
| yield f" Model : {LLM_MODEL}" | |
| yield f" Docs : {list(DOCS.keys())}\n" | |
| # ── Step 2: Compile ─────────────────────────────────────────────────────── | |
| yield section_header("Step 2 — Compile Documents into the Wiki") | |
| yield "Each document is read by the LLM, which writes summaries + concept pages.\n" | |
| for fname in DOCS: | |
| doc_path = raw_dir / fname | |
| yield f" ➕ Adding: {fname}" | |
| out = kb_cmd(f"add {doc_path}") | |
| yield textwrap.indent(out[:600], " ") | |
| time.sleep(1) | |
| yield "✅ All documents compiled.\n" | |
| # ── Step 3: Explore ─────────────────────────────────────────────────────── | |
| yield section_header("Step 3 — Explore the Generated Wiki") | |
| yield "📂 Directory tree (wiki/):\n" | |
| yield "\n".join(show_tree(wiki_dir, max_depth=3)) | |
| yield "\n📄 wiki/index.md:\n" + "─" * 50 | |
| lines = (wiki_dir / "index.md").read_text().splitlines() | |
| yield "\n".join(lines[:35]) | |
| yield "\n📄 wiki/log.md:\n" + "─" * 50 | |
| lines = (wiki_dir / "log.md").read_text().splitlines() | |
| yield "\n".join(lines[:35]) | |
| concepts = sorted((wiki_dir / "concepts").glob("*.md")) | |
| yield f"\n💡 Generated concept pages ({len(concepts)}):" | |
| for cp in concepts: | |
| yield f" • {cp.name}" | |
| if concepts: | |
| yield f"\n📄 Sample concept — {concepts[0].name}:\n" + "─" * 50 | |
| lines = concepts[0].read_text().splitlines() | |
| yield "\n".join(lines[:35]) | |
| # ── Step 4: List & Status ───────────────────────────────────────────────── | |
| yield section_header("Step 4 — List Indexed Content & Status") | |
| yield "── openkb list ──\n" + kb_cmd("list") | |
| yield "\n── openkb status ──\n" + kb_cmd("status") | |
| # ── Step 5: Queries ─────────────────────────────────────────────────────── | |
| yield section_header("Step 5 — Query the Knowledge Base") | |
| query_list = [run_query.strip()] if run_query.strip() else QUERIES | |
| for i, query in enumerate(query_list, 1): | |
| yield f"\n❓ Query {i}: {query}\n" + "─" * 60 | |
| yield kb_cmd(f'query "{query}"') | |
| # ── Step 6: Deep synthesis ──────────────────────────────────────────────── | |
| yield section_header("Step 6 — Save a Deep Synthesis Query") | |
| deep_query = ( | |
| "Synthesise the key architectural themes across transformers, RAG, and " | |
| "knowledge graphs into a unified mental model." | |
| ) | |
| yield f"❓ Query: {deep_query}\n" | |
| out = kb_cmd(f'query "{deep_query}" --save') | |
| yield out[:800] | |
| explorations = list((wiki_dir / "explorations").glob("*.md")) | |
| if explorations: | |
| yield f"\n📄 Saved → {explorations[-1].name}\n" + "─" * 50 | |
| lines = explorations[-1].read_text().splitlines() | |
| yield "\n".join(lines[:35]) | |
| # ── Step 7: Lint ────────────────────────────────────────────────────────── | |
| yield section_header("Step 7 — Lint: Wiki Health Checks") | |
| yield kb_cmd("lint") | |
| reports = list((wiki_dir / "reports").glob("*.md")) | |
| if reports: | |
| yield f"\n📄 Report — {reports[-1].name}:\n" + "─" * 50 | |
| lines = reports[-1].read_text().splitlines() | |
| yield "\n".join(lines[:35]) | |
| # ── Step 8: Programmatic analysis ──────────────────────────────────────── | |
| yield section_header("Step 8 — Programmatic Wiki Analysis") | |
| wiki_pages = {} | |
| for md_file in wiki_dir.rglob("*.md"): | |
| rel = str(md_file.relative_to(wiki_dir)) | |
| content = md_file.read_text() | |
| links = re.findall(r'\[\[([^\]]+)\]\]', content) | |
| wiki_pages[rel] = {"lines": len(content.splitlines()), "wikilinks": links} | |
| yield f"Total wiki pages : {len(wiki_pages)}\n" | |
| header = f"{'Page':<45} {'Lines':>6} {'Links':>5}\n" + "─" * 60 | |
| yield header | |
| for page, m in sorted(wiki_pages.items()): | |
| yield f" {page:<43} {m['lines']:>6} {len(m['wikilinks']):>5}" | |
| link_targets = Counter( | |
| link for m in wiki_pages.values() for link in m["wikilinks"] | |
| ) | |
| if link_targets: | |
| yield "\n🏆 Most-referenced wiki pages (hub concepts):" | |
| for page, count in link_targets.most_common(8): | |
| yield f" {count:>3}x [[{page}]]" | |
| # ── Step 9: Incremental update ──────────────────────────────────────────── | |
| yield section_header("Step 9 — Incremental Update: Add a 4th Document") | |
| new_doc = raw_dir / "sparse_attention.md" | |
| new_doc.write_text(textwrap.dedent("""\ | |
| # Sparse Attention Mechanisms | |
| ## Motivation | |
| Standard transformer attention is O(n^2) in sequence length, limiting context | |
| windows. Sparse attention patterns reduce this to O(n log n) or O(n*sqrt(n)). | |
| ## Key Approaches | |
| - **Longformer** (Beltagy et al., 2020): local sliding-window + global tokens. | |
| - **BigBird** (Zaheer et al., 2020): random + window + global; Turing-complete. | |
| - **Flash Attention** (Dao et al., 2022): exact attention, hardware-aware CUDA | |
| tiling. Not sparse but dramatically faster in practice. | |
| ## Impact on RAG | |
| Larger context windows reduce the need for chunking and retrieval. However, | |
| retrieval still helps for corpora larger than any single context window. | |
| ## References | |
| Beltagy et al. (2020). Longformer. arXiv:2004.05150. | |
| Zaheer et al. (2020). Big Bird. NeurIPS. | |
| Dao et al. (2022). FlashAttention. NeurIPS. | |
| """)) | |
| concepts_before = len(list((wiki_dir / "concepts").glob("*.md"))) | |
| yield f"Adding: {new_doc.name}" | |
| yield kb_cmd(f"add {new_doc}")[:500] | |
| concepts_after = list((wiki_dir / "concepts").glob("*.md")) | |
| yield f"\n💡 Concept pages: {concepts_before} -> {len(concepts_after)}" | |
| for c in sorted(concepts_after, key=lambda p: p.stat().st_mtime, reverse=True)[:3]: | |
| yield f" • {c.name}" | |
| # ── Done ────────────────────────────────────────────────────────────────── | |
| yield section_header("Tutorial Complete 🎉") | |
| yield textwrap.dedent(f""" | |
| What we covered | |
| ─────────────── | |
| 1. Installed OpenKB | |
| 2. Entered API key securely (never stored in code) | |
| 3. Used FREE model: meta-llama/llama-3.3-70b-instruct via OpenRouter | |
| 4. Initialised KB at {KB_DIR} | |
| 5. Created 3 AI research docs and compiled them into a wiki | |
| 6. Explored auto-generated summaries, concept pages, and index | |
| 7. Listed content (openkb list) and checked stats (openkb status) | |
| 8. Ran queries of increasing complexity | |
| 9. Saved a deep synthesis query to wiki/explorations/ | |
| 10. Linted the wiki for health issues | |
| 11. Analysed the wiki graph programmatically (hub pages, cross-refs) | |
| 12. Added a 4th document — demonstrated incremental live updates | |
| Other free OpenRouter models to try: | |
| ───────────────────────────────────── | |
| openrouter/mistralai/mistral-7b-instruct:free | |
| openrouter/google/gemma-3-27b-it:free | |
| openrouter/qwen/qwen3-14b:free | |
| openrouter/microsoft/phi-4-reasoning:free | |
| Docs: https://github.com/VectifyAI/OpenKB | |
| """) | |
| def stream_tutorial(api_key: str, custom_query: str): | |
| """Gradio streaming wrapper — accumulates output in the textbox.""" | |
| accumulated = "" | |
| for chunk in run_tutorial(api_key, custom_query): | |
| accumulated += chunk + "\n" | |
| yield accumulated | |
| # ── Gradio UI ───────────────────────────────────────────────────────────────── | |
| CSS = """ | |
| #output-box textarea { font-family: 'Courier New', monospace; font-size: 13px; } | |
| """ | |
| with gr.Blocks(title="OpenKB + Llama Tutorial — Marktechpost", css=CSS) as demo: | |
| gr.Markdown(""" | |
| # 📚 OpenKB + OpenRouter (Llama 3.3-70B) — Knowledge Base Tutorial | |
| **by [Marktechpost](https://marktechpost.com)** | |
| Build an AI-powered wiki from research documents using [OpenKB](https://github.com/VectifyAI/OpenKB) | |
| and Meta's **Llama 3.3-70B** via [OpenRouter](https://openrouter.ai) — **100% free, no credit card needed**. | |
| > 🔑 Get your free API key at [openrouter.ai/keys](https://openrouter.ai/keys) | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| api_key_input = gr.Textbox( | |
| label="OpenRouter API Key", | |
| placeholder="sk-or-...", | |
| type="password", | |
| info="Your key is used only for this session and never stored.", | |
| ) | |
| custom_query = gr.Textbox( | |
| label="Custom Query (optional)", | |
| placeholder="Leave blank to run all 4 default queries", | |
| lines=2, | |
| ) | |
| run_btn = gr.Button("▶ Run Full Tutorial", variant="primary") | |
| clear_btn = gr.Button("🗑 Clear Output", variant="secondary") | |
| output_box = gr.Textbox( | |
| label="Output", | |
| lines=40, | |
| max_lines=80, | |
| interactive=False, | |
| elem_id="output-box", | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| ### Steps covered | |
| | # | Step | | |
| |---|------| | |
| | 1 | Initialise knowledge base | | |
| | 2 | Compile 3 AI research docs into the wiki via LLM | | |
| | 3 | Explore auto-generated summaries & concept pages | | |
| | 4 | `openkb list` + `openkb status` | | |
| | 5 | Run natural-language queries | | |
| | 6 | Save a deep synthesis query | | |
| | 7 | Lint the wiki for health issues | | |
| | 8 | Programmatic cross-reference graph analysis | | |
| | 9 | Incremental update with a 4th document | | |
| """) | |
| run_btn.click( | |
| fn=stream_tutorial, | |
| inputs=[api_key_input, custom_query], | |
| outputs=output_box, | |
| show_progress=True, | |
| ) | |
| clear_btn.click(fn=lambda: "", outputs=output_box) | |
| if __name__ == "__main__": | |
| demo.launch() | |