Spaces:
Sleeping
Sleeping
| <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head> | |
| <meta charset="utf-8"> | |
| <meta name="generator" content="quarto-1.3.340"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes"> | |
| <title>vocabulary</title> | |
| <style> | |
| code{white-space: pre-wrap;} | |
| span.smallcaps{font-variant: small-caps;} | |
| div.columns{display: flex; gap: min(4vw, 1.5em);} | |
| div.column{flex: auto; overflow-x: auto;} | |
| div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;} | |
| ul.task-list{list-style: none;} | |
| ul.task-list li input[type="checkbox"] { | |
| width: 0.8em; | |
| margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ | |
| vertical-align: middle; | |
| } | |
| </style> | |
| <script src="Vocabulary_files/libs/clipboard/clipboard.min.js"></script> | |
| <script src="Vocabulary_files/libs/quarto-html/quarto.js"></script> | |
| <script src="Vocabulary_files/libs/quarto-html/popper.min.js"></script> | |
| <script src="Vocabulary_files/libs/quarto-html/tippy.umd.min.js"></script> | |
| <script src="Vocabulary_files/libs/quarto-html/anchor.min.js"></script> | |
| <link href="Vocabulary_files/libs/quarto-html/tippy.css" rel="stylesheet"> | |
| <link href="Vocabulary_files/libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles"> | |
| <script src="Vocabulary_files/libs/bootstrap/bootstrap.min.js"></script> | |
| <link href="Vocabulary_files/libs/bootstrap/bootstrap-icons.css" rel="stylesheet"> | |
| <link href="Vocabulary_files/libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light"> | |
| </head> | |
| <body class="fullcontent"> | |
| <div id="quarto-content" class="page-columns page-rows-contents page-layout-article"> | |
| <main class="content" id="quarto-document-content"> | |
| <section id="vocabularylingo-challenge" class="level1"> | |
| <h1>Vocabulary/Lingo Challenge</h1> | |
| <section id="link-to-my-repository-and-screenshot-of-my-app" class="level2"> | |
| <h2 class="anchored" data-anchor-id="link-to-my-repository-and-screenshot-of-my-app">Link to My Repository and Screenshot of My App</h2> | |
| <p><strong>Repository Link:</strong> <a href="https://huggingface.co/spaces/ds460/luncefordn_weather_app">Weather Dashboard</a></p> | |
| <p><strong>Screenshot:</strong></p> | |
| <div class="quarto-figure quarto-figure-center"> | |
| <figure class="figure"> | |
| <p><img src="screenshot.png" class="img-fluid figure-img"></p> | |
| <figcaption class="figure-caption">Streamlit App Screenshot</figcaption> | |
| </figure> | |
| </div> | |
| <section id="added-value-of-using-databricks-in-your-data-science-process" class="level3"> | |
| <h3 class="anchored" data-anchor-id="added-value-of-using-databricks-in-your-data-science-process">Added Value of Using DataBricks in Your Data Science Process</h3> | |
| <p>DataBricks offers several advantages in the data science workflow, including:</p> | |
| <ul> | |
| <li><strong>Scalability:</strong> DataBricks can handle large-scale data processing using Apache Spark, enabling efficient data handling and analytics.</li> | |
| <li><strong>Collaboration:</strong> It provides a collaborative environment where data scientists and engineers can work together in shared notebooks.</li> | |
| <li><strong>Integration:</strong> Seamlessly integrates with various data sources and tools like AWS, Azure, Delta Lake, and more.</li> | |
| <li><strong>Performance:</strong> Optimized runtime ensures faster data processing and querying.</li> | |
| <li><strong>Machine Learning:</strong> Built-in support for machine learning workflows, including model training, deployment, and monitoring.</li> | |
| <li><strong>Parallelization and Clusters:</strong> DataBricks leverages Spark’s parallel processing capabilities to distribute tasks across multiple nodes in a cluster, enhancing performance and scalability.</li> | |
| </ul> | |
| <div class="quarto-figure quarto-figure-center"> | |
| <figure class="figure"> | |
| <p><img src="databricks_diagram.png" class="img-fluid figure-img"></p> | |
| <figcaption class="figure-caption">DataBricks Diagram</figcaption> | |
| </figure> | |
| </div> | |
| </section> | |
| <section id="comparing-pyspark-to-pandas" class="level3"> | |
| <h3 class="anchored" data-anchor-id="comparing-pyspark-to-pandas">Comparing PySpark to Pandas</h3> | |
| <table class="table"> | |
| <colgroup> | |
| <col style="width: 17%"> | |
| <col style="width: 42%"> | |
| <col style="width: 40%"> | |
| </colgroup> | |
| <thead> | |
| <tr class="header"> | |
| <th>Feature</th> | |
| <th>PySpark</th> | |
| <th>Pandas</th> | |
| </tr> | |
| </thead> | |
| <tbody> | |
| <tr class="odd"> | |
| <td><strong>Scalability</strong></td> | |
| <td>Distributed computing for large data</td> | |
| <td>Limited to single machine memory</td> | |
| </tr> | |
| <tr class="even"> | |
| <td><strong>Performance</strong></td> | |
| <td>Efficient with large datasets</td> | |
| <td>Slower with very large datasets</td> | |
| </tr> | |
| <tr class="odd"> | |
| <td><strong>APIs</strong></td> | |
| <td>Similar to SQL, complex functions</td> | |
| <td>Intuitive, Pythonic</td> | |
| </tr> | |
| <tr class="even"> | |
| <td><strong>Ecosystem</strong></td> | |
| <td>Part of Apache Spark</td> | |
| <td>Part of Python’s scientific stack</td> | |
| </tr> | |
| <tr class="odd"> | |
| <td><strong>Use Case</strong></td> | |
| <td>Big data, distributed processing</td> | |
| <td>Small to medium data, in-memory</td> | |
| </tr> | |
| <tr class="even"> | |
| <td><strong>Integration</strong></td> | |
| <td>Integrates with Hadoop, Spark SQL</td> | |
| <td>Integrates with NumPy, SciPy, etc.</td> | |
| </tr> | |
| </tbody> | |
| </table> | |
| </section> | |
| <section id="explaining-docker-to-a-non-tech-person" class="level3"> | |
| <h3 class="anchored" data-anchor-id="explaining-docker-to-a-non-tech-person">Explaining Docker to a Non-Tech Person</h3> | |
| <p>Think of Docker as a software version of a shipping container. Similar to how shipping containers facilitate the transportation of goods regardless of the mode of transport (e.g., by ship, train, or truck), Docker enables software to operate consistently across various computing environments.</p> | |
| <p>Docker serves as a tool that helps developers wrap their applications and associated dependencies into a “container.” This container can be deployed on any computer system, guaranteeing the performance of the application irrespective of the hosting environment—like how a shipping container maintains the integrity of its contents regardless of the means of transportation used.</p> | |
| <p>In Docker terminology, an <strong>image</strong> is like a blueprint for building a container. It includes everything needed to run an application—code, runtime, libraries, and settings. A <strong>container</strong> is a running instance of an image, similar to how a shipped container holds and transports goods based on its blueprint.</p> | |
| <p><strong>Key Points:</strong></p> | |
| <ul> | |
| <li><strong>Consistency:</strong> Docker containers ensure that the software runs consistently on any machine, just like shipping containers keep goods safe and consistent during transport.</li> | |
| <li><strong>Isolation:</strong> Each application runs in its own container, avoiding conflicts with other applications, similar to how shipping containers keep goods separated and secure from each other.</li> | |
| <li><strong>Efficiency:</strong> Containers are lightweight and share the host system’s resources efficiently, much like how shipping containers are designed to be stackable and space-efficient.</li> | |
| <li><strong>Portability:</strong> Containers can be easily moved from one environment to another (e.g., from a developer’s laptop to a cloud server), just like shipping containers can be transferred from a ship to a train to a truck seamlessly.</li> | |
| </ul> | |
| <p><strong>Diagram:</strong></p> | |
| <div class="quarto-figure quarto-figure-center"> | |
| <figure class="figure"> | |
| <p><img src="https://miro.medium.com/max/331/1*rCo_Q1-f7V5zYoKAQed_Mw.png" class="img-fluid figure-img"></p> | |
| <figcaption class="figure-caption">Docker Diagram</figcaption> | |
| </figure> | |
| </div> | |
| </section> | |
| <section id="sources" class="level3"> | |
| <h3 class="anchored" data-anchor-id="sources">Sources</h3> | |
| <ul> | |
| <li><a href="https://docs.databricks.com/en/introduction/index.html">Databricks Documentation</a></li> | |
| <li><a href="https://docs.docker.com/guides/docker-overview/">Docker Overview</a></li> | |
| <li><a href="https://medium.com/geekculture/pandas-vs-pyspark-fe110c266e5c">Pandas vs. PySpark on Medium</a></li> | |
| </ul> | |
| </section> | |
| </section> | |
| </section> | |
| </main> | |
| <!-- /main column --> | |
| <script id="quarto-html-after-body" type="application/javascript"> | |
| window.document.addEventListener("DOMContentLoaded", function (event) { | |
| const toggleBodyColorMode = (bsSheetEl) => { | |
| const mode = bsSheetEl.getAttribute("data-mode"); | |
| const bodyEl = window.document.querySelector("body"); | |
| if (mode === "dark") { | |
| bodyEl.classList.add("quarto-dark"); | |
| bodyEl.classList.remove("quarto-light"); | |
| } else { | |
| bodyEl.classList.add("quarto-light"); | |
| bodyEl.classList.remove("quarto-dark"); | |
| } | |
| } | |
| const toggleBodyColorPrimary = () => { | |
| const bsSheetEl = window.document.querySelector("link#quarto-bootstrap"); | |
| if (bsSheetEl) { | |
| toggleBodyColorMode(bsSheetEl); | |
| } | |
| } | |
| toggleBodyColorPrimary(); | |
| const icon = ""; | |
| const anchorJS = new window.AnchorJS(); | |
| anchorJS.options = { | |
| placement: 'right', | |
| icon: icon | |
| }; | |
| anchorJS.add('.anchored'); | |
| const isCodeAnnotation = (el) => { | |
| for (const clz of el.classList) { | |
| if (clz.startsWith('code-annotation-')) { | |
| return true; | |
| } | |
| } | |
| return false; | |
| } | |
| const clipboard = new window.ClipboardJS('.code-copy-button', { | |
| text: function(trigger) { | |
| const codeEl = trigger.previousElementSibling.cloneNode(true); | |
| for (const childEl of codeEl.children) { | |
| if (isCodeAnnotation(childEl)) { | |
| childEl.remove(); | |
| } | |
| } | |
| return codeEl.innerText; | |
| } | |
| }); | |
| clipboard.on('success', function(e) { | |
| // button target | |
| const button = e.trigger; | |
| // don't keep focus | |
| button.blur(); | |
| // flash "checked" | |
| button.classList.add('code-copy-button-checked'); | |
| var currentTitle = button.getAttribute("title"); | |
| button.setAttribute("title", "Copied!"); | |
| let tooltip; | |
| if (window.bootstrap) { | |
| button.setAttribute("data-bs-toggle", "tooltip"); | |
| button.setAttribute("data-bs-placement", "left"); | |
| button.setAttribute("data-bs-title", "Copied!"); | |
| tooltip = new bootstrap.Tooltip(button, | |
| { trigger: "manual", | |
| customClass: "code-copy-button-tooltip", | |
| offset: [0, -8]}); | |
| tooltip.show(); | |
| } | |
| setTimeout(function() { | |
| if (tooltip) { | |
| tooltip.hide(); | |
| button.removeAttribute("data-bs-title"); | |
| button.removeAttribute("data-bs-toggle"); | |
| button.removeAttribute("data-bs-placement"); | |
| } | |
| button.setAttribute("title", currentTitle); | |
| button.classList.remove('code-copy-button-checked'); | |
| }, 1000); | |
| // clear code selection | |
| e.clearSelection(); | |
| }); | |
| function tippyHover(el, contentFn) { | |
| const config = { | |
| allowHTML: true, | |
| content: contentFn, | |
| maxWidth: 500, | |
| delay: 100, | |
| arrow: false, | |
| appendTo: function(el) { | |
| return el.parentElement; | |
| }, | |
| interactive: true, | |
| interactiveBorder: 10, | |
| theme: 'quarto', | |
| placement: 'bottom-start' | |
| }; | |
| window.tippy(el, config); | |
| } | |
| const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]'); | |
| for (var i=0; i<noterefs.length; i++) { | |
| const ref = noterefs[i]; | |
| tippyHover(ref, function() { | |
| // use id or data attribute instead here | |
| let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href'); | |
| try { href = new URL(href).hash; } catch {} | |
| const id = href.replace(/^#\/?/, ""); | |
| const note = window.document.getElementById(id); | |
| return note.innerHTML; | |
| }); | |
| } | |
| let selectedAnnoteEl; | |
| const selectorForAnnotation = ( cell, annotation) => { | |
| let cellAttr = 'data-code-cell="' + cell + '"'; | |
| let lineAttr = 'data-code-annotation="' + annotation + '"'; | |
| const selector = 'span[' + cellAttr + '][' + lineAttr + ']'; | |
| return selector; | |
| } | |
| const selectCodeLines = (annoteEl) => { | |
| const doc = window.document; | |
| const targetCell = annoteEl.getAttribute("data-target-cell"); | |
| const targetAnnotation = annoteEl.getAttribute("data-target-annotation"); | |
| const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation)); | |
| const lines = annoteSpan.getAttribute("data-code-lines").split(","); | |
| const lineIds = lines.map((line) => { | |
| return targetCell + "-" + line; | |
| }) | |
| let top = null; | |
| let height = null; | |
| let parent = null; | |
| if (lineIds.length > 0) { | |
| //compute the position of the single el (top and bottom and make a div) | |
| const el = window.document.getElementById(lineIds[0]); | |
| top = el.offsetTop; | |
| height = el.offsetHeight; | |
| parent = el.parentElement.parentElement; | |
| if (lineIds.length > 1) { | |
| const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]); | |
| const bottom = lastEl.offsetTop + lastEl.offsetHeight; | |
| height = bottom - top; | |
| } | |
| if (top !== null && height !== null && parent !== null) { | |
| // cook up a div (if necessary) and position it | |
| let div = window.document.getElementById("code-annotation-line-highlight"); | |
| if (div === null) { | |
| div = window.document.createElement("div"); | |
| div.setAttribute("id", "code-annotation-line-highlight"); | |
| div.style.position = 'absolute'; | |
| parent.appendChild(div); | |
| } | |
| div.style.top = top - 2 + "px"; | |
| div.style.height = height + 4 + "px"; | |
| let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter"); | |
| if (gutterDiv === null) { | |
| gutterDiv = window.document.createElement("div"); | |
| gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter"); | |
| gutterDiv.style.position = 'absolute'; | |
| const codeCell = window.document.getElementById(targetCell); | |
| const gutter = codeCell.querySelector('.code-annotation-gutter'); | |
| gutter.appendChild(gutterDiv); | |
| } | |
| gutterDiv.style.top = top - 2 + "px"; | |
| gutterDiv.style.height = height + 4 + "px"; | |
| } | |
| selectedAnnoteEl = annoteEl; | |
| } | |
| }; | |
| const unselectCodeLines = () => { | |
| const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"]; | |
| elementsIds.forEach((elId) => { | |
| const div = window.document.getElementById(elId); | |
| if (div) { | |
| div.remove(); | |
| } | |
| }); | |
| selectedAnnoteEl = undefined; | |
| }; | |
| // Attach click handler to the DT | |
| const annoteDls = window.document.querySelectorAll('dt[data-target-cell]'); | |
| for (const annoteDlNode of annoteDls) { | |
| annoteDlNode.addEventListener('click', (event) => { | |
| const clickedEl = event.target; | |
| if (clickedEl !== selectedAnnoteEl) { | |
| unselectCodeLines(); | |
| const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active'); | |
| if (activeEl) { | |
| activeEl.classList.remove('code-annotation-active'); | |
| } | |
| selectCodeLines(clickedEl); | |
| clickedEl.classList.add('code-annotation-active'); | |
| } else { | |
| // Unselect the line | |
| unselectCodeLines(); | |
| clickedEl.classList.remove('code-annotation-active'); | |
| } | |
| }); | |
| } | |
| const findCites = (el) => { | |
| const parentEl = el.parentElement; | |
| if (parentEl) { | |
| const cites = parentEl.dataset.cites; | |
| if (cites) { | |
| return { | |
| el, | |
| cites: cites.split(' ') | |
| }; | |
| } else { | |
| return findCites(el.parentElement) | |
| } | |
| } else { | |
| return undefined; | |
| } | |
| }; | |
| var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]'); | |
| for (var i=0; i<bibliorefs.length; i++) { | |
| const ref = bibliorefs[i]; | |
| const citeInfo = findCites(ref); | |
| if (citeInfo) { | |
| tippyHover(citeInfo.el, function() { | |
| var popup = window.document.createElement('div'); | |
| citeInfo.cites.forEach(function(cite) { | |
| var citeDiv = window.document.createElement('div'); | |
| citeDiv.classList.add('hanging-indent'); | |
| citeDiv.classList.add('csl-entry'); | |
| var biblioDiv = window.document.getElementById('ref-' + cite); | |
| if (biblioDiv) { | |
| citeDiv.innerHTML = biblioDiv.innerHTML; | |
| } | |
| popup.appendChild(citeDiv); | |
| }); | |
| return popup.innerHTML; | |
| }); | |
| } | |
| } | |
| }); | |
| </script> | |
| </div> <!-- /content --> | |
| </body></html> |