[{"data":1,"prerenderedAt":848},["ShallowReactive",2],{"i-kinnu:logo":3,"i-kinnu:origami-folding":8,"pathway-science-statistics-for-data-science-introductory-level":12,"i-lucide:chevron-right":843,"i-lucide:tag":846},{"left":4,"top":4,"width":5,"height":5,"rotate":4,"vFlip":6,"hFlip":6,"body":7},0,27,false,"\u003Cg fill=\"none\">\u003Cpath d=\"M0.046875 1.05555C0.046875 1.03541 0.048197 1.01579 0.0507438 0.996728C0.0987149 0.438619 0.586845 0 1.18194 0H25.4398C26.451 0 26.9575 1.171 26.2424 1.85585L15.7301 11.9243L1.31574 0.903476C1.17475 0.79568 1.01137 0.761884 0.859586 0.784111L26.2936 25.1441C27.0086 25.829 26.5022 27 25.4909 27H1.18194C0.555061 27 0.046875 26.5133 0.046875 25.9129V1.05555Z\" fill=\"currentColor\"/>\u003C/g>",{"left":4,"top":4,"width":9,"height":10,"rotate":4,"vFlip":6,"hFlip":6,"body":11},1000,236,"\u003Cg fill=\"none\">\u003Cpath fill-rule=\"evenodd\" clip-rule=\"evenodd\"\n    d=\"M193.68 38.2238C195.994 38.2238 197.87 40.0989 197.87 42.412V231.812C197.87 234.125 195.994 236 193.68 236H4.19013C1.87603 236 2.02305e-07 234.125 0 231.812V42.412C-2.02305e-07 40.0989 1.87603 38.2238 4.19013 38.2238H193.68ZM111.76 89.0072C111.685 87.9474 110.572 87.2905 109.608 87.7376L96.8872 93.641C95.7786 94.1554 95.702 95.7016 96.7545 96.3225L101.579 99.167C94.7045 109.365 90.5733 122.892 90.5732 137.642C90.5733 154.323 95.8569 169.439 104.416 179.945C105.301 181.032 106.9 181.196 107.987 180.311C109.075 179.426 109.238 177.828 108.353 176.741C100.621 167.25 95.6522 153.305 95.6521 137.642C95.6522 123.661 99.6138 111.051 105.963 101.754L110.456 104.403C111.508 105.024 112.826 104.21 112.74 102.991L111.76 89.0072ZM9.63194 136.286C9.14864 136.286 8.75684 136.678 8.75684 137.161C8.7569 137.644 9.14868 138.035 9.63194 138.035H17.2161C17.6993 138.035 18.0912 137.644 18.0912 137.161C18.0912 136.678 17.6994 136.286 17.2161 136.286H9.63194ZM22.6813 136.286C22.198 136.286 21.8062 136.678 21.8062 137.161C21.8063 137.644 22.1981 138.035 22.6813 138.035H30.2655C30.7487 138.035 31.1406 137.644 31.1406 137.161C31.1406 136.678 30.7488 136.286 30.2655 136.286H22.6813ZM35.7464 136.286C35.2631 136.286 34.8713 136.678 34.8713 137.161C34.8713 137.644 35.2631 138.035 35.7464 138.035H44.4973C44.9805 138.035 45.3724 137.644 45.3724 137.161C45.3724 136.678 44.9806 136.286 44.4973 136.286H35.7464ZM49.9977 136.286C49.5144 136.286 49.1226 136.678 49.1226 137.161C49.1226 137.644 49.5144 138.035 49.9977 138.035H57.5819C58.0651 138.035 58.4569 137.644 58.457 137.161C58.457 136.678 58.0651 136.286 57.5819 136.286H49.9977ZM63.0783 136.286C62.595 136.286 62.2032 136.678 62.2032 137.161C62.2033 137.644 62.5951 138.035 63.0783 138.035H70.6625C71.1457 138.035 71.5375 137.644 71.5376 137.161C71.5376 136.678 71.1457 136.286 70.6625 136.286H63.0783ZM76.1277 136.286C75.6444 136.286 75.2526 136.678 75.2526 137.161C75.2527 137.644 75.6445 138.035 76.1277 138.035H83.7119C84.1951 138.035 84.5869 137.644 84.587 137.161C84.587 136.678 84.1951 136.286 83.7119 136.286H76.1277ZM102.266 136.286C101.782 136.286 101.39 136.678 101.39 137.161C101.391 137.644 101.782 138.035 102.266 138.035H109.85C110.333 138.035 110.725 137.644 110.725 137.161C110.725 136.678 110.333 136.286 109.85 136.286H102.266ZM115.338 136.286C114.855 136.286 114.463 136.678 114.463 137.161C114.463 137.644 114.855 138.035 115.338 138.035H122.923C123.406 138.035 123.798 137.644 123.798 137.161C123.798 136.678 123.406 136.286 122.923 136.286H115.338ZM128.403 136.286C127.92 136.286 127.528 136.678 127.528 137.161C127.528 137.644 127.92 138.035 128.403 138.035H135.988C136.471 138.035 136.863 137.644 136.863 137.161C136.863 136.678 136.471 136.286 135.988 136.286H128.403ZM141.468 136.286C140.985 136.286 140.593 136.678 140.593 137.161C140.593 137.644 140.985 138.035 141.468 138.035H149.053C149.536 138.035 149.928 137.644 149.928 137.161C149.928 136.678 149.536 136.286 149.053 136.286H141.468ZM154.541 136.286C154.058 136.286 153.666 136.678 153.666 137.161C153.666 137.644 154.058 138.035 154.541 138.035H162.125C162.609 138.035 163 137.644 163.001 137.161C163.001 136.678 162.609 136.286 162.125 136.286H154.541ZM167.614 136.286C167.131 136.286 166.739 136.678 166.739 137.161C166.739 137.644 167.131 138.035 167.614 138.035H175.198C175.681 138.035 176.073 137.644 176.073 137.161C176.073 136.678 175.681 136.286 175.198 136.286H167.614ZM180.671 136.286C180.188 136.286 179.796 136.678 179.796 137.161C179.796 137.644 180.188 138.035 180.671 138.035H188.255C188.739 138.035 189.13 137.644 189.131 137.161C189.131 136.678 188.739 136.286 188.255 136.286H180.671Z\"\n    fill=\"currentColor\" />\n  \u003Cpath fill-rule=\"evenodd\" clip-rule=\"evenodd\"\n    d=\"M444.85 38.2277C447.164 38.2277 449.04 40.1028 449.04 42.4159V132.928C449.04 135.241 447.164 137.116 444.85 137.116H255.36C253.046 137.116 251.17 135.241 251.17 132.928V42.4159C251.17 40.1028 253.046 38.2277 255.36 38.2277H444.85ZM361.96 125.388C361.618 125.046 361.064 125.046 360.722 125.388L354.534 131.572C354.192 131.914 354.192 132.468 354.534 132.81C354.876 133.151 355.43 133.151 355.772 132.81L361.96 126.624C362.301 126.283 362.301 125.73 361.96 125.388ZM371.047 116.311C370.705 115.969 370.15 115.969 369.809 116.311L364.446 121.671C364.104 122.012 364.104 122.567 364.446 122.908C364.788 123.249 365.342 123.25 365.684 122.908L371.047 117.548C371.388 117.207 371.388 116.652 371.047 116.311ZM380.124 107.246C379.782 106.904 379.227 106.904 378.885 107.246L373.523 112.606C373.181 112.948 373.181 113.502 373.523 113.844C373.864 114.185 374.419 114.185 374.761 113.844L380.124 108.483C380.465 108.142 380.465 107.587 380.124 107.246ZM385.736 65.8841C385.891 64.6727 384.622 63.7845 383.536 64.3434L371.069 70.7636C370.124 71.2504 369.96 72.5334 370.752 73.2424L381.2 82.5938C382.11 83.4081 383.561 82.8672 383.717 81.6557L384.393 76.3725C391.143 77.1933 398.567 80.7709 404.771 86.9711C411.124 93.3213 414.726 100.952 415.43 107.827C415.573 109.221 416.819 110.236 418.214 110.093C419.609 109.95 420.624 108.703 420.481 107.309C419.644 99.1317 415.435 90.4514 408.362 83.3817C401.466 76.489 393.038 72.3185 385.038 71.338L385.736 65.8841ZM389.2 98.1733C388.859 97.8319 388.304 97.8318 387.962 98.1733L382.6 103.534C382.258 103.875 382.258 104.429 382.6 104.771C382.941 105.112 383.496 105.112 383.838 104.771L389.2 99.4108C389.542 99.0693 389.542 98.5149 389.2 98.1733ZM398.262 89.1047C397.92 88.7633 397.365 88.7632 397.024 89.1047L391.661 94.4649C391.319 94.8065 391.319 95.3608 391.661 95.7024C392.002 96.0436 392.557 96.0438 392.899 95.7024L398.262 90.3421C398.603 90.0007 398.603 89.4463 398.262 89.1047ZM416.431 70.9616C416.089 70.6202 415.534 70.6201 415.193 70.9616L409.83 76.3218C409.488 76.6634 409.488 77.2177 409.83 77.5592C410.172 77.9005 410.726 77.9007 411.068 77.5592L416.431 72.199C416.772 71.8575 416.772 71.3032 416.431 70.9616ZM425.508 61.891C425.166 61.5496 424.611 61.5495 424.27 61.891L418.907 67.2512C418.565 67.5928 418.565 68.1471 418.907 68.4887C419.249 68.8299 419.803 68.8301 420.145 68.4887L425.508 63.1284C425.849 62.787 425.849 62.2326 425.508 61.891ZM434.569 52.8146C434.227 52.4731 433.673 52.4731 433.331 52.8146L427.968 58.1748C427.626 58.5163 427.627 59.0706 427.968 59.4122C428.31 59.7534 428.864 59.7537 429.206 59.4122L434.569 54.052C434.91 53.7105 434.91 53.1562 434.569 52.8146ZM443.638 43.7479C443.296 43.4065 442.742 43.4064 442.4 43.7479L437.037 49.1081C436.695 49.4496 436.696 50.004 437.037 50.3455C437.379 50.6868 437.933 50.687 438.275 50.3455L443.638 44.9853C443.98 44.6438 443.979 44.0895 443.638 43.7479Z\"\n    fill=\"currentColor\" />\n  \u003Cpath fill-rule=\"evenodd\" clip-rule=\"evenodd\"\n    d=\"M684.066 38.2277C687.798 38.2281 689.667 42.7391 687.027 45.3773L596.473 135.889C595.687 136.675 594.621 137.116 593.51 137.116H506.335C504.021 137.116 502.145 135.241 502.145 132.928V42.4159C502.145 40.1028 504.021 38.2277 506.335 38.2277H684.066ZM514.603 124.566C514.261 124.224 513.707 124.224 513.365 124.566L507.178 130.751C506.836 131.093 506.836 131.646 507.178 131.988C507.519 132.329 508.073 132.329 508.415 131.988L514.603 125.803C514.945 125.462 514.945 124.908 514.603 124.566ZM523.689 115.491C523.348 115.15 522.794 115.15 522.452 115.491L517.09 120.852C516.748 121.193 516.748 121.747 517.09 122.088C517.431 122.43 517.985 122.43 518.327 122.088L523.689 116.728C524.031 116.386 524.031 115.833 523.689 115.491ZM532.102 65.8295C530.707 65.6872 529.46 66.7017 529.318 68.0957C529.175 69.4896 530.189 70.7355 531.584 70.8787C538.463 71.5825 546.096 75.1826 552.45 81.5329C558.723 87.8037 562.312 95.3226 563.079 102.13L557.738 102.392C556.518 102.452 555.865 103.855 556.607 104.827L565.115 115.969C565.76 116.814 567.051 116.751 567.611 115.847L574.992 103.928C575.635 102.889 574.848 101.555 573.628 101.615L568.161 101.882C568.161 101.878 568.162 101.874 568.161 101.871C567.324 93.6931 563.114 85.0124 556.041 77.9425C548.968 70.873 540.283 66.6668 532.102 65.8295ZM532.766 106.421C532.425 106.079 531.871 106.079 531.529 106.421L526.166 111.781C525.825 112.123 525.825 112.676 526.166 113.018C526.508 113.359 527.062 113.359 527.403 113.018L532.766 107.657C533.108 107.316 533.108 106.762 532.766 106.421ZM541.843 97.3445C541.501 97.003 540.948 97.003 540.606 97.3445L535.243 102.705C534.901 103.046 534.902 103.6 535.243 103.941C535.585 104.283 536.139 104.283 536.48 103.941L541.843 98.5809C542.185 98.2393 542.185 97.686 541.843 97.3445ZM550.92 88.2778C550.578 87.9363 550.025 87.9363 549.683 88.2778L544.32 93.638C543.978 93.9796 543.978 94.5329 544.32 94.8745C544.662 95.2161 545.215 95.2161 545.557 94.8745L550.92 89.5142C551.262 89.1727 551.262 88.6193 550.92 88.2778ZM569.066 70.1405C568.724 69.799 568.17 69.7991 567.829 70.1405L562.466 75.5008C562.124 75.8423 562.124 76.3956 562.466 76.7372C562.808 77.0788 563.361 77.0788 563.703 76.7372L569.066 71.377C569.407 71.0354 569.407 70.4821 569.066 70.1405ZM578.143 61.0699C577.801 60.7284 577.247 60.7285 576.906 61.0699L571.543 66.4302C571.201 66.7717 571.201 67.3251 571.543 67.6666C571.885 68.0082 572.438 68.0082 572.78 67.6666L578.143 62.3064C578.484 61.9648 578.484 61.4115 578.143 61.0699ZM587.219 51.9896C586.878 51.6481 586.324 51.6481 585.982 51.9896L580.62 57.3498C580.278 57.6914 580.278 58.2447 580.62 58.5863C580.961 58.9279 581.515 58.9279 581.857 58.5863L587.219 53.2261C587.561 52.8845 587.561 52.3312 587.219 51.9896ZM596.288 42.9249C595.947 42.5833 595.392 42.5833 595.05 42.9249L589.689 48.2851C589.347 48.6267 589.347 49.18 589.689 49.5216C590.03 49.863 590.584 49.8631 590.926 49.5216L596.288 44.1613C596.63 43.8198 596.63 43.2664 596.288 42.9249Z\"\n    fill=\"currentColor\" />\n  \u003Cpath fill-rule=\"evenodd\" clip-rule=\"evenodd\"\n    d=\"M850.814 38.2277C854.547 38.2281 856.416 42.739 853.777 45.3773L763.223 135.889C762.437 136.674 761.371 137.116 760.26 137.116H673.176C669.443 137.116 667.574 132.605 670.213 129.966L760.768 39.4544C761.554 38.6692 762.62 38.2277 763.731 38.2277H850.814ZM761.338 121.8C760.855 121.8 760.463 122.191 760.463 122.674V131.13H762.213V122.674C762.213 122.191 761.821 121.8 761.338 121.8ZM761.338 108.971C760.855 108.971 760.463 109.363 760.463 109.846V118.301H762.213V109.846C762.213 109.363 761.821 108.971 761.338 108.971ZM761.338 96.1402C760.855 96.1406 760.463 96.5321 760.463 97.0149V105.47H762.213V97.0149C762.213 96.532 761.821 96.1404 761.338 96.1402ZM782.263 71.887C781.043 71.951 780.395 73.3571 781.139 74.3257L784.474 78.6631C779.115 82.951 771.242 85.7443 762.35 85.7444C753.366 85.7442 745.421 82.8944 740.059 78.5305C738.972 77.6461 737.373 77.8099 736.488 78.8961C735.602 79.983 735.766 81.582 736.853 82.467C743.231 87.6574 752.348 90.8207 762.35 90.8209C772.209 90.8208 781.205 87.746 787.568 82.6884L790.833 86.9341C791.577 87.9025 793.103 87.6391 793.479 86.4767L797.791 73.138C798.118 72.127 797.33 71.1017 796.268 71.1566L782.263 71.887ZM761.338 70.4847C760.855 70.4851 760.463 70.8767 760.463 71.3594V79.8147H762.213V71.3594C762.213 70.8766 761.821 70.485 761.338 70.4847ZM761.338 57.656C760.855 57.6564 760.463 58.048 760.463 58.5307V66.986H762.213V58.5307C762.213 58.0479 761.821 57.6563 761.338 57.656ZM761.338 44.8293C760.855 44.8297 760.463 45.2212 760.463 45.704V54.1592H762.213V45.704C762.213 45.2211 761.821 44.8295 761.338 44.8293Z\"\n    fill=\"currentColor\" />\n  \u003Cpath\n    d=\"M995.759 38.2277C999.53 38.228 1001.42 42.5171 998.752 45.0253L959.55 81.9005L905.796 41.5363C905.271 41.1418 904.662 41.0182 904.096 41.0994L997.485 130.319C1000.15 132.828 998.262 137.116 994.491 137.116H905.298C902.96 137.116 901.065 135.333 901.065 133.134V42.0941C901.065 42.0204 901.07 41.9483 901.079 41.8786C901.258 39.8345 903.079 38.2277 905.298 38.2277H995.759Z\"\n    fill=\"currentColor\" />\n  \u003Cpath\n    d=\"M505.873 0C506.657 4.57042e-05 507.307 0.195499 507.823 0.587023C508.338 0.969046 508.596 1.53802 508.596 2.29251C508.596 2.76034 508.467 3.19015 508.209 3.58162C507.951 3.96344 507.497 4.26401 506.848 4.48361V4.54114C507.65 4.67487 508.205 4.96191 508.51 5.4012C508.816 5.83087 508.969 6.31772 508.969 6.86193C508.969 7.74056 508.672 8.41851 508.08 8.89604C507.497 9.38304 506.733 9.62731 505.787 9.62738C504.861 9.62738 504.158 9.42172 503.68 9.0111C503.212 8.60054 502.935 8.08005 502.849 7.44993L503.881 7.10571L503.924 7.24028C504.035 7.54934 504.211 7.82925 504.454 8.07986C504.731 8.36635 505.166 8.50986 505.758 8.50989C506.465 8.50989 506.943 8.32772 507.191 7.9648C507.449 7.6019 507.579 7.20078 507.579 6.7615C507.579 6.2173 507.378 5.80683 506.977 5.52992C506.585 5.25295 505.93 5.10026 505.013 5.07161V4.15402C505.901 4.12537 506.489 3.92484 506.776 3.55237C507.062 3.18009 507.206 2.82242 507.206 2.47876C507.206 1.62801 506.752 1.17539 505.845 1.12237L505.658 1.11749C505.467 1.11752 505.242 1.14605 504.985 1.2033C504.736 1.25105 504.511 1.3274 504.31 1.43245L504.081 2.56457L503.05 2.44951L503.322 0.687461C503.666 0.49653 504.068 0.33454 504.526 0.200875C504.985 0.0671945 505.434 0 505.873 0Z\"\n    fill=\"currentColor\" />\n  \u003Cpath\n    d=\"M905.727 2.30616L904.638 2.4066L904.466 1.26083H901.428V3.72497C901.533 3.71544 901.643 3.71034 901.757 3.71034H902.086C902.755 3.71034 903.386 3.78668 903.979 3.93949C904.58 4.09229 905.068 4.38363 905.44 4.8132C905.822 5.23335 906.014 5.84949 906.014 6.66106C906.014 7.64468 905.722 8.38068 905.14 8.86776C904.557 9.36434 903.783 9.6127 902.818 9.61275C901.91 9.61275 901.213 9.40711 900.725 8.99648C900.248 8.59544 899.96 8.08007 899.865 7.44993L900.911 7.10571C901.007 7.49723 901.203 7.8271 901.499 8.09449C901.795 8.37131 902.211 8.50985 902.746 8.50989C903.395 8.50989 903.869 8.33787 904.165 7.99405C904.461 7.65981 904.609 7.22507 904.609 6.69031C904.609 5.87861 904.337 5.3625 903.792 5.14279C903.248 4.91361 902.612 4.79958 901.886 4.79955C901.695 4.79955 901.489 4.80365 901.27 4.8132C901.059 4.82275 900.854 4.83701 900.653 4.85611L900.224 4.44071V0.143343H905.569L905.727 2.30616Z\"\n    fill=\"currentColor\" />\n  \u003Cpath fill-rule=\"evenodd\" clip-rule=\"evenodd\"\n    d=\"M765.49 6.04576H766.966L766.837 7.14862H765.49V9.48404H764.185V7.14862H759.857L759.713 6.04576L762.909 0.143343H765.49V6.04576ZM760.96 6.04576H764.185V1.26083H763.541L760.96 6.04576Z\"\n    fill=\"currentColor\" />\n  \u003Cpath d=\"M4.80573 6.47481H6.41154V7.60693H1.81068V6.47481H3.50235V1.27546H1.81068V0.143343H4.80573V6.47481Z\"\n    fill=\"currentColor\" />\n  \u003Cpath\n    d=\"M254.359 0C255.353 0 256.055 0.239186 256.466 0.716715C256.877 1.18447 257.083 1.68072 257.083 2.20573C257.083 2.85516 256.849 3.44346 256.38 3.96875C255.912 4.49397 255.348 4.96638 254.689 5.38657C254.039 5.79717 253.437 6.15968 252.883 6.47481H256.423L256.538 5.42948L257.599 5.51529L257.426 7.60693H251.407L251.292 6.58987C252.582 5.73032 253.638 4.98523 254.46 4.35489C255.281 3.71509 255.693 3.05632 255.693 2.37832C255.693 1.53787 255.166 1.11749 254.115 1.12237L254.115 1.11749C253.924 1.11754 253.695 1.14604 253.427 1.2033C253.16 1.25104 252.916 1.32238 252.697 1.41783L252.467 2.47876L251.45 2.3637L251.707 0.60165C252.118 0.401088 252.563 0.253475 253.041 0.15797C253.519 0.0529708 253.958 1.99446e-05 254.359 0Z\"\n    fill=\"currentColor\" />\u003C/g>",{"id":13,"data":14,"type":15,"maxContentLevel":29,"version":15,"tiles":30},"f7034334-7aad-48e9-8c31-395f96f0f0ae",{"type":15,"title":16,"tagline":17,"description":17,"featureImageSquare":18,"baseColor":19,"emoji":20,"shapePreference":21,"allowContentSuspension":22,"allowContentEdits":22,"editorsChoice":6,"accreditations":23,"certificatePriceLevel":21,"certificationTitle":28},8,"Statistics for Data Science: Introductory Level","A beginner's guide to statistical analysis for data science","bc4cb535-c9d5-4f24-9702-62305d3c649c","#9A8F74","📊",3,true,[24],{"authority":25,"wasCpdTill":26,"previousCpdCreditMinutes":27},1,"2025-12-31T00:00:00Z",120,"Statistics for Data Science (Level 1)",9,[31,190,315,496,672],{"id":32,"data":33,"type":29,"maxContentLevel":21,"version":36,"orbs":37},"b8e485d5-7c86-4a26-abb6-940e35819dfb",{"type":29,"title":34,"tagline":35},"Introductory Knowledge","From statistics to data science – what the difference is and how can you benefit from each",2,[38,75],{"id":39,"data":40,"type":36,"version":36,"maxContentLevel":21,"pages":42},"42612903-960f-4255-b515-88866a90a9ae",{"type":36,"title":41},"The Importance of Statistics",[43,60],{"id":44,"data":45,"type":25,"maxContentLevel":21,"version":36,"reviews":49},"18fedd83-e83c-4e8f-8b33-f239243364a2",{"type":25,"title":46,"contentRole":36,"markdownContent":47,"audioMediaId":48},"Why do we need statistics? ","The field of statistics has helped us answer many of life’s mysteries. It saves lives by helping us know how to treat people in hospitals, helps us make money by predicting values in the stock market, enables us to improve living and economic conditions around the world by quantifying important issues and measuring our improvement, and even shows us who might win the next election.\n\n![Graph](image://92df0c29-bd0d-40b5-a9f4-c1f2e4fa9e1c \"The enigma machine\")\n\nStatistics helped the allies win WW2 and crack the enigma code. It's so important we even made a day for it - World Statistics Day takes place on October 20 every year.\n\nThe saying goes that ‘knowledge is power’. Well, statistics is how we test that knowledge and find out if we are right or wrong, or somewhere in between. Perhaps statistics is the way for us to make our way to being ‘less wrong than we were before’.","dc5ece2b-50ab-460a-85e4-4508451e4cd8",[50],{"id":51,"data":52,"type":53,"version":25,"maxContentLevel":21},"b62bf453-7e31-4d40-92b4-8d4746d4f357",{"type":53,"reviewType":36,"spacingBehaviour":25,"binaryQuestion":54,"binaryCorrect":56,"binaryIncorrect":58},11,[55],"Statistics helped the allies to crack which code during WW2?",[57],"Enigma",[59],"Da Vinci",{"id":61,"data":62,"type":25,"maxContentLevel":21,"version":36,"reviews":66},"443e7846-0a2e-49d9-8d42-3f4f5762c6f6",{"type":25,"title":63,"contentRole":36,"markdownContent":64,"audioMediaId":65},"What is statistics?","Statistics is how we find patterns and trends in data, and even make predictions about populations we have little information about, based on small samples of observations.\n\nStatistics is a useful tool for summarizing and explaining the data we have, as well as uncovering insights about entire countries based on data gathered by a small number of people.\n\n![Graph](image://1b63148c-9ab0-44db-9822-706a1ca489ad \"Statistics can help identify trends in data\")\n\nIt enables us to conduct reliable and replicable research experiments and is the reason we understand as much about the world as we do today. This is because, without it, we’d be very much just guessing our way through things, and hoping for the best.\n\nStatistics can help us answer questions like “Does Kinnu improve final year exam performance?” and “is there a relationship between how much time you spend in Kinnu and how interesting people find you?”\n\n(In case you were wondering, the answer to both questions is yes!)","2f4eedeb-642d-43c2-9d63-b8f0f1a6df7f",[67],{"id":68,"data":69,"type":53,"version":25,"maxContentLevel":21},"a918c75f-8eaa-4c61-899b-057cfeaf31f8",{"type":53,"reviewType":70,"spacingBehaviour":25,"clozeQuestion":71,"clozeWords":73},4,[72],"Statistics is a useful tool for summarizing and explaining data.",[74],"data",{"id":76,"data":77,"type":36,"version":25,"maxContentLevel":21,"pages":79},"30e1f000-2533-47e0-9d8e-f27b725d8dcd",{"type":36,"title":78},"The Role of Data Science",[80,96,112,126,142,158,172],{"id":81,"data":82,"type":25,"maxContentLevel":21,"version":25,"reviews":86},"ee750c9b-7f60-4509-ac35-d489249593f1",{"type":25,"title":83,"contentRole":36,"markdownContent":84,"audioMediaId":85},"What is data science and what can you do with it?","\nData science involves using a combination of statistics, data analytics, and machine learning algorithms to create useful solutions from data. \n\nWith data science, we can teach computers to do things like recognize handwriting and sort mail, or automate the digitization of forms, predict credit card fraud, communicate with humans, and even drive cars. \n\n\n\nData science has become an indispensable part of our modern world. It impacts us in both big and small ways, making our lives more efficient and streamlined. \n\nFor example, on a small scale, data science allows us to have smart spam filters on our email accounts. Algorithms can learn to distinguish junk mail from important correspondence, saving us time and frustration.\n\n\n ![Graph](image://15644b71-7b53-4203-9b16-d5d82711ead1 \"Data science can help stop spam\")\n\nBut data science is also making a big impact on the way we tackle critical challenges like cancer detection. Thanks to sophisticated data analysis, doctors are now able to more accurately detect tumors and understand the unique features of each patient's cancer. \n\nBy using machine learning to analyze scans and other data, we can better target treatments, improving outcomes for countless patients. \n\n","74dda011-f59c-4310-acd1-77eb15e50878",[87],{"id":88,"data":89,"type":53,"version":25,"maxContentLevel":21},"e1a126eb-4830-44f8-b715-d981984c5ffa",{"type":53,"reviewType":36,"spacingBehaviour":25,"binaryQuestion":90,"binaryCorrect":92,"binaryIncorrect":94},[91],"What technique helps data scientists with cancer detection?",[93],"Machine learning",[95],"Ultrasound",{"id":97,"data":98,"type":25,"maxContentLevel":21,"version":25,"reviews":102},"15ae3be9-9fed-4c32-a646-be2038becfc6",{"type":25,"title":99,"contentRole":36,"markdownContent":100,"audioMediaId":101},"What is the difference between statistics and data science?","Statisticians find relationships between different things like height and life expectancy, are able to infer something about the whole population based on the data they got from their small sample, or use statistical tests to decide if a new drug is an effective treatment or not. \n\nData scientists on the other hand define sets of rules or calculations, known as algorithms,  so that computers can make predictions and decisions at scale, and learn along the way. \n\n ![Graph](image://4aa56982-91f9-4595-b98d-22015ed0de88 \"Algorithmic modelling can be valuable in medical research\")\n\nThese sets of rules can be thought of as ‘recipes’.  Data scientists perform statistical analysis too, but the use of machine learning algorithms to make these machine learning “recipes” is their main focus. \n\nIn summary, while statistics are used for designing experiments and testing things, data science uses algorithms to find patterns in data and predict optimal decisions or what might happen in the future. \n\nStatistics are an important underpinning of much of data science, and most data scientists need to understand the foundational knowledge that statistics provides in order to create useful algorithms and models.\n","91796300-e797-4372-8e0e-d873955bd2c3",[103],{"id":104,"data":105,"type":53,"version":25,"maxContentLevel":21},"8c4232d2-55b5-441c-be9a-cdd4e901684a",{"type":53,"reviewType":36,"spacingBehaviour":25,"binaryQuestion":106,"binaryCorrect":108,"binaryIncorrect":110},[107],"What is the main focus of data science?",[109],"Using algorithms to find patterns in data and make predictions",[111],"Making descriptive analysis of data",{"id":113,"data":114,"type":25,"maxContentLevel":21,"version":25,"reviews":118},"54f846bc-246d-43c9-b668-41083bdc6757",{"type":25,"title":115,"contentRole":36,"markdownContent":116,"audioMediaId":117},"What knowledge does Data Science build on? ","\nData science is a combination of data analytics, statistics, and machine learning. \n\nData analytics helps us describe things, and make conclusions about what we can see in front of us from the data we have. Data analytics is for things like measuring and describing past performance – for example business revenues per quarter, or finding an e-commerce store’s top performing categories. It doesn’t make predictions or forecasts. \n\nStatistics is about experiments, testing, and proving. Whereas analytics might show you differences between your revenues, statistics will show you whether that difference is significant, and which factors are likely to have caused it, as well as how much of an effect each individual factor had. \n\nMachine Learning helps us find patterns in real-world data, and predict things based on new and limited information. These days, Artificial Intelligence (AI) and Machine Learning are used pretty much interchangeably, even though there are some differences. AI is mostly about mimicking human intelligence and behavior, and Machine Learning is a subfield of AI that can be used to help build AI solutions.\n","ba96b7b0-4342-4667-a8c2-2e64f7604c68",[119],{"id":120,"data":121,"type":53,"version":25,"maxContentLevel":21},"0c029c25-f426-4336-9898-dcddf3fbec55",{"type":53,"reviewType":70,"spacingBehaviour":25,"clozeQuestion":122,"clozeWords":124},[123],"Data science is a combination of data analytics, statistics, and machine learning.",[125],"data analytics",{"id":127,"data":128,"type":25,"maxContentLevel":21,"version":25,"reviews":132},"2e3c2396-8b8b-4058-8229-e1d3dead1496",{"type":25,"title":129,"contentRole":36,"markdownContent":130,"audioMediaId":131},"What is data analytics?","\nData analytics can best be thought of as descriptive. So, you describe the characteristics of data that you already have, rather than taking action to transform data or run models to predict future data. \n\nData analysts often rely on data visualisation to create informative stories about the data, creating narratives that are understandable to non-technical audiences.\n\n ![Graph](image://ca795711-f454-40c2-8b1c-a2367b236b3c \"Many apps exist for basic data analytics for your health\")\n\nAs an example, if you track all sales for an e-commerce store, you can find the average order value, and make a chart to show what percentage of sales each product category accounts for. Alternatively, you could create a dashboard for a new health app that tracks users' physical activity and key health metrics.\n\n","6e60925e-71e7-4922-86ae-fe7e58d90988",[133],{"id":134,"data":135,"type":53,"version":25,"maxContentLevel":21},"32d67987-10da-4a7b-abdf-660dcb0b1982",{"type":53,"reviewType":36,"spacingBehaviour":25,"binaryQuestion":136,"binaryCorrect":138,"binaryIncorrect":140},[137],"Which of these describes data analytics?",[139],"Descriptive analysis",[141],"Predictive modelling",{"id":143,"data":144,"type":25,"maxContentLevel":21,"version":25,"reviews":148},"6b42633e-c110-4ca7-a3e3-f7153405916a",{"type":25,"title":145,"contentRole":36,"markdownContent":146,"audioMediaId":147},"What can (and can’t) data analytics do? ","\nA data analyst uses exploratory methods to look for gems in data that can inform business units like leadership or marketing, or even get passed on to the statisticians and machine learning engineers for further exploration and use. They create useful and engaging reports on data that is collected by companies, scientists, governments, and more. \n\n ![Graph](image://31160117-d3b1-4b54-9ea8-aec88ca3d27c \"Data analytics is useful for stock market traders\")\n\nAnalytics gives you the information you need so that you’re not flying blind. It’s useful for decision-makers because it’s your eyes and ears. \n\nHowever, data analytics doesn’t allow you to come to conclusions beyond the data, whereas statistics and machine learning do – for example, by predicting the average weight of a population based on a small sample, or generating stock market predictions.\n\n","2c84a1dc-9df4-41a1-b0e3-10fac6f463e9",[149],{"id":150,"data":151,"type":53,"version":25,"maxContentLevel":21},"51c7f823-9522-48da-8579-464797e4aec0",{"type":53,"reviewType":36,"spacingBehaviour":25,"binaryQuestion":152,"binaryCorrect":154,"binaryIncorrect":156},[153],"Which of these is true about data analytics?",[155],"It allows you to create useful reports",[157],"It allows you to create predictive models",{"id":159,"data":160,"type":25,"maxContentLevel":21,"version":25,"reviews":164},"383f4828-a08f-4643-bc19-79c774c0c36e",{"type":25,"title":161,"contentRole":36,"markdownContent":162,"audioMediaId":163},"How does analytics differ from statistics?","\nData analytics gives you a better understanding of your data and helps you form better questions for exploration and verification with statistics. An example might be a marketing campaign performance report where you observe that an increase in sales corresponded with a new social media ad campaign.\n\n ![Graph](image://c88ce951-da11-4a61-9c54-ccdb3c524895 \"Statistics are useful in determining business strategies\")\n\nWith statistics, however, you could test whether the difference in the performance of your marketing campaigns is statistically significant, which adds confidence to business decision-making. Statistical significance answers the question of whether the change in the data you are observing could just be due to chance.\n\nWhile analytics helps you form hypotheses, statistics help you test them.\n\n","fe205eec-42d5-4f83-b480-77e641d1213c",[165],{"id":166,"data":167,"type":53,"version":25,"maxContentLevel":21},"52ce84b4-4d80-4c4e-8f71-79c7967e2ecc",{"type":53,"reviewType":70,"spacingBehaviour":25,"clozeQuestion":168,"clozeWords":170},[169],"Analytics helps you form hypotheses, whereas statistics help you test them.",[171],"hypotheses",{"id":173,"data":174,"type":25,"maxContentLevel":21,"version":25,"reviews":178},"cdf070df-874a-4dcd-a0e1-013e4ad7998c",{"type":25,"title":175,"contentRole":36,"markdownContent":176,"audioMediaId":177},"Can you make predictions with data analytics?","\nAnalytics is descriptive, it shows you what is in front of you and describes it. It tells you where you have been, but not if you’ve improved significantly, nor where you are going. This means that you can’t use analytics to make predictions. \n\n ![Graph](image://37809e6a-3561-4f6f-b28a-6229c8925485 \"Screen time apps provide data analytics\")\n\nAnalytics is used to create reports and summaries of past data, like dashboards in apps such as an activity monitor that shows you how many hours a day you have been active. \n\nTo make predictions, you need to use statistics or data science. But analytics can be helpful in finding useful information in your existing data to use for statistical testing, or data science algorithms – so it is still used as a part of the process. \n\n","7e16f527-b06d-4cbd-a303-a71fa5d9f33f",[179],{"id":180,"data":181,"type":53,"version":25,"maxContentLevel":21},"ca973697-4cf0-4b8f-afe9-f8d889cb0b83",{"type":53,"reviewType":21,"spacingBehaviour":25,"multiChoiceQuestion":182,"multiChoiceCorrect":184,"multiChoiceIncorrect":186},[183],"What is the purpose of analytics?",[185],"To create reports and summaries of past data",[187,188,189],"To make predictions","To test hypotheses","To use data science algorithms",{"id":191,"data":192,"type":29,"maxContentLevel":21,"version":25,"orbs":195},"354fce4d-f6c1-4289-8e4a-02b3907f76c4",{"type":29,"title":193,"tagline":194},"Samples and Populations","How to choose the subjects of your analysis, and avoid common errors.",[196,247,280],{"id":197,"data":198,"type":36,"version":25,"maxContentLevel":21,"pages":200},"1ddf6431-38b6-4613-9ad1-64f44eda98fc",{"type":36,"title":199},"Understanding Populations and Samples",[201,215,231],{"id":202,"data":203,"type":25,"maxContentLevel":21,"version":25,"reviews":207},"95bc7af2-d6c6-4b9b-87b8-47bed5d3043e",{"type":25,"title":204,"contentRole":36,"markdownContent":205,"audioMediaId":206},"Populations","A population is a group of interest for your research, like everyone in your city or country. It doesn’t necessarily have to be a group of people either, any list of items works. \n\nAs an example, it could be all laptops of a specific model produced by a company in 2021. In this example, the time period depends on what you’re interested in finding out. \n\n ![Graph](image://3786a416-ee2d-4b33-8322-0a8bb31c0f41 \"The population is the total group of units for your study\")\n\nIf you wanted to know the number of defects for just one year, then the time period you used for your sample would be one year. If you wanted to know the number of defects for all time, then your population would be all laptops of that model ever produced. \n\nThese are some of the reasons that it’s important to know your research aims before you select your population. \n\n","85e6bd76-ddf9-4d92-afd2-78ce69d20744",[208],{"id":209,"data":210,"type":53,"version":25,"maxContentLevel":21},"48c3b2a3-ec53-428f-a2c3-a49a0bde0e0d",{"type":53,"reviewType":25,"spacingBehaviour":25,"activeRecallQuestion":211,"activeRecallAnswers":213},[212],"What is a population in research?",[214],"A group of interest for your research",{"id":216,"data":217,"type":25,"maxContentLevel":21,"version":25,"reviews":221},"d7cbd587-cf7f-4704-93cd-5b5d4107907a",{"type":25,"title":218,"contentRole":36,"markdownContent":219,"audioMediaId":220},"Samples","\nIn statistics, you often want to know more about an entire population, but to survey them all would be too expensive, or take too long, so you take a smaller sample instead. \n\n ![Graph](image://0a91f7bf-e218-41c6-ba5f-e18e5d3859ee \"An illustration of a sample\")\n\nA sample is the portion of that population that we gathered data on for our research. As an example, imagine that there are 4 million people in your city, but it is only realistic for you to collect data on 400 people. \n\nYour population is 4 million but those 400 people are your sample. Even though you didn’t get data on everyone in our city, with good sampling and statistics we can reliably make inferences about the population based on what we observe in our sample. \n\n","ac4d8768-60a9-4662-9fd2-8b5846de7ce0",[222],{"id":223,"data":224,"type":53,"version":25,"maxContentLevel":21},"78b425c0-a49b-495c-8f2a-9186dfe97ff4",{"type":53,"reviewType":36,"spacingBehaviour":25,"binaryQuestion":225,"binaryCorrect":227,"binaryIncorrect":229},[226],"What is the portion of a population that we gather data on for our research called?",[228],"Sample",[230],"Population",{"id":232,"data":233,"type":25,"maxContentLevel":21,"version":25,"reviews":237},"874e552e-162f-400a-b4df-8573314ee4bc",{"type":25,"title":234,"contentRole":36,"markdownContent":235,"audioMediaId":236},"Observations and Units","\nA population is all the people you are interested in finding something out about. A sample is a small group we take from our population so that we can analyze and test the data we gather from them. \n\nAn observation is the term we use for one data point – meaning one element that we are observing within a sample. It is not to be confused with a unit within that sample – meaning one specific member of the group being observed.\n\n ![Graph](image://53dd3769-c569-4dd1-b977-b93382a07adb \"The observation here would be 'area of residence'\")\n\nImagine you are conducting a study of people’s heights on a dating app – strictly for research purposes, of course. The population would be the group on that app you are interested in – perhaps ‘men’, or ‘women’. \n\nThe sample would be a selection of people within that group who you choose to gather data from. The observation would be the height of each person in that group. A unit would be one individual from that group.\n\n","30897824-9a94-4a84-88bc-06123c5a99de",[238],{"id":239,"data":240,"type":53,"version":25,"maxContentLevel":21},"e594c03f-7674-4133-a9d2-02d2bf1f93cf",{"type":53,"reviewType":36,"spacingBehaviour":25,"binaryQuestion":241,"binaryCorrect":243,"binaryIncorrect":245},[242],"What is the term used for one data point being analyzed within a sample?",[244],"Observation",[246],"Unit",{"id":248,"data":249,"type":36,"version":25,"maxContentLevel":21,"pages":251},"bcfb48b2-c737-4cfb-8f70-113a0cd2866c",{"type":36,"title":250},"Ensuring Representative Samples",[252,266],{"id":253,"data":254,"type":25,"maxContentLevel":21,"version":25,"reviews":258},"74c62d2d-d0d9-41ee-86cc-4a08b1daf1cb",{"type":25,"title":255,"contentRole":36,"markdownContent":256,"audioMediaId":257},"Representative samples ","\nA good sample is representative of the population you’re interested in studying and learning more about. \n\nLook in the mirror, now back at me. Now look back in the mirror and then back at me again. A sample is representative when it mirrors or reflects the characteristics of the population which you would like to learn more about. \n\n\n ![Graph](image://699159b3-a86d-4f1c-9745-e80dca0ee357 \"Samples should represent every group within the population being studied\")\n\nWas what you saw in the mirror representative of yourself? Sure it was… but was it representative of everyone in your neighborhood? Well… probably not. \n\nTo get a representative sample you would need to gather lots more people. Ideally your sample is completely representative of the population you are studying, a term known as your ‘target’ population. So if you are conducting a study on how young people spend their time, you need a sample that is representative of everyone that fits your definition of ‘young’ – not just people from your immediate environment.\n\n","fed54e7a-c911-4c3c-a471-7fe1c4f8404a",[259],{"id":260,"data":261,"type":53,"version":25,"maxContentLevel":21},"44352025-e319-479a-b44d-b436c8c98fe1",{"type":53,"reviewType":25,"spacingBehaviour":25,"activeRecallQuestion":262,"activeRecallAnswers":264},[263],"When can a sample be representative of the population you are studying?",[265],"When it mirrors or reflects the characteristics of the population which you would like to learn more about",{"id":267,"data":268,"type":25,"maxContentLevel":21,"version":25,"reviews":272},"2788b1a4-9ac8-4701-9951-7113a746d99b",{"type":25,"title":269,"contentRole":36,"markdownContent":270,"audioMediaId":271},"Generalization for statistics ","\nStatistical generalization means using the results we obtained from a sample and inferring characteristics about a population from those results. \n\nIt’s important for a sample to be representative of its population so that we can generalize results from statistical testing to the population at large. \n\nOtherwise, the population could be too different from our sample, and the pattern or effect we saw in our sample might not exist in the population. \n","a6cec959-df9a-4183-8c69-1641010ec2bc",[273],{"id":274,"data":275,"type":53,"version":25,"maxContentLevel":21},"e1ea3eb5-579e-460e-bb20-f6ebcefdd49b",{"type":53,"reviewType":25,"spacingBehaviour":25,"activeRecallQuestion":276,"activeRecallAnswers":278},[277],"What technique requires using the results from a sample to infer characteristics about a population?",[279],"Statistical generalization",{"id":281,"data":282,"type":36,"version":25,"maxContentLevel":21,"pages":284},"114d6076-78d4-4199-87ff-ab846c531cae",{"type":36,"title":283},"Generalization in Statistics and Data Science",[285,301],{"id":286,"data":287,"type":25,"maxContentLevel":21,"version":25,"reviews":291},"56a203ee-9b4b-4c8b-b475-b56a8466289f",{"type":25,"title":288,"contentRole":36,"markdownContent":289,"audioMediaId":290},"Generalization for data science ","\nGeneralization for data science is how well a machine learning model – the algorithmic recipe you use to create predictions or classifications – adapts to new data it hasn’t seen yet. \n\nThis is because just like we use samples in statistics, we can also consider the data we use to train our machine learning models as a sample. This is because we can’t possibly use all the data that exists for our sample and because machine learning is often about making predictions or classifications on data that doesn’t even exist yet.  \n\nIf your model generalizes well, then the results you see in the real world will closely match the results you saw in training and testing the model. \n\n","6a22c80a-f9a3-4277-91f2-e7a16cceb034",[292],{"id":293,"data":294,"type":53,"version":25,"maxContentLevel":21},"c3e800f3-1d74-4307-b55e-ad169c938979",{"type":53,"reviewType":36,"spacingBehaviour":25,"binaryQuestion":295,"binaryCorrect":297,"binaryIncorrect":299},[296],"What is the goal of generalization for data science?",[298],"To make predictions or classifications on data that doesn't exist yet.",[300],"To use all the data that exists for a sample.",{"id":302,"data":303,"type":25,"maxContentLevel":21,"version":25,"reviews":307},"ad483acb-7a31-44ac-8bd4-d9148263efa6",{"type":25,"title":304,"contentRole":36,"markdownContent":305,"audioMediaId":306},"How samples can be used for training data models","\nIn order to create accurate models using data science, it's important to carefully select and prepare your data. Generally, you'll want to start by creating a training dataset. This dataset will contain the samples of data that your model will learn from. \n\nBy analyzing these samples, your model will learn to identify patterns and make predictions. But to ensure your model is accurate, you'll also need to test it against a separate dataset, called the test dataset.\n\nThe test dataset should contain data that wasn't used in the training process. Because your dataset is complete, including the correct value for the variable you are predicting, you can compare how accurate your predictions are against the actual observations.\n\nThis process of training and testing is critical to building a model that is both accurate and generalizable.\n","42b679ee-333e-407f-b370-97c23d532d99",[308],{"id":309,"data":310,"type":53,"version":25,"maxContentLevel":21},"99d67401-232c-4c0c-8ca4-294d8c64fed0",{"type":53,"reviewType":25,"spacingBehaviour":25,"activeRecallQuestion":311,"activeRecallAnswers":313},[312],"What is used to compare the accuracy of a model's predictions against actual observations?",[314],"A test dataset",{"id":316,"data":317,"type":29,"maxContentLevel":21,"version":25,"orbs":320},"90046f31-c6ba-41fe-99c9-9bf89fdc4b25",{"type":29,"title":318,"tagline":319},"Types of Variables ","Build a strong data foundation by developing an understanding of variable types ",[321,425],{"id":322,"data":323,"type":36,"version":25,"maxContentLevel":21,"pages":325},"f2605600-80d6-4443-9f09-9d51caba28cf",{"type":36,"title":324},"Types of Variables",[326,344,369,383,397,411],{"id":327,"data":328,"type":25,"maxContentLevel":21,"version":25,"reviews":332},"aaa56978-cc14-440e-b34c-91822aae781f",{"type":25,"title":329,"contentRole":36,"markdownContent":330,"audioMediaId":331},"Introduction to discrete variables ","\nA discrete variable is something that is counted, but not measured. What could that possibly mean, you might ask? Well, when it comes to things you can measure, like kilograms, you can have a puppy that weighs 6.5 kilograms, but you can’t have 6.5 golden retrievers. \n\n ![Graph](image://a4e47ade-fa30-4424-b1b8-4e7c349f0116 \"Golden retriever puppies\")\n\nWeight is something you measure, as is height or distance, while things like people, the numbers on a die, and puppies are things that you count. Other discrete variables are things like models of car, or even a score on a 10-point scale. \n\nWe use discrete variables for counting the frequencies of different categories in the population, or separating our data into groups for comparison and analysis.\n\n","f576ff8b-fb0d-4a90-99cd-695aa9e4bd3e",[333],{"id":334,"data":335,"type":53,"version":25,"maxContentLevel":21},"da605504-4ac3-40aa-98d4-c2c269e71908",{"type":53,"reviewType":21,"spacingBehaviour":25,"multiChoiceQuestion":336,"multiChoiceCorrect":338,"multiChoiceIncorrect":340},[337],"What is an example of a discrete variable?",[339],"Number of units",[341,342,343],"Weight","Height","Distance",{"id":345,"data":346,"type":25,"maxContentLevel":21,"version":25,"reviews":350},"7253b23f-416e-4af6-a2c2-247d99135abb",{"type":25,"title":347,"contentRole":36,"markdownContent":348,"audioMediaId":349},"Continuous variables ","\nContinuous data is measured but not counted. Think of a ruler – that’s a measuring tool and you can use it to measure objects. You could measure the length of the footlong sandwich at your local restaurant to see if it really is as long as they say it is. You might find that it's 0.96547 feet. Or it could be 1.0002 feet. \n\n ![Graph](image://238c3a02-fe8d-4042-bae5-227ddacd796f \"A footlong sandwich. Image: Famartin, CC BY-SA 4.0, via Wikimedia Commons\")\n\nIf you have a really precise measuring tool, you can measure things to very fine degrees, and get a number like 21.2542 centimeters. Or zoom out with a less precise measurement, like 21 centimeters. \n\nThe point is that continuous data can take on a value to any level of precision, depending how accurately you measure it. This is in comparison to count data, for example, counting the number of new customers your business attracted this month – you just can’t have 21.2542 people. \n\nOther examples of continuous data include things such as height, weight, the battery percentage on your phone, and how far you have to travel to work.\n\n","8b48c939-b677-4bbe-9eae-700f077099b8",[351,358],{"id":352,"data":353,"type":53,"version":25,"maxContentLevel":21},"d69ec654-7005-402a-867f-2a7a4540ac81",{"type":53,"reviewType":25,"spacingBehaviour":25,"activeRecallQuestion":354,"activeRecallAnswers":356},[355],"What type of data is measured but not counted?",[357],"Continuous data",{"id":359,"data":360,"type":53,"version":25,"maxContentLevel":21},"f02a2a7d-eace-4db2-a494-cc2f6939eec2",{"type":53,"reviewType":21,"spacingBehaviour":25,"multiChoiceQuestion":361,"multiChoiceCorrect":363,"multiChoiceIncorrect":365},[362],"What is an example of continuous data?",[364],"The length of a footlong sandwich",[366,367,368],"Items on the menu","Shops in an area","Daily customers",{"id":370,"data":371,"type":25,"maxContentLevel":21,"version":25,"reviews":375},"9cfef123-b8e4-4c41-b9e6-d74773aaa3af",{"type":25,"title":372,"contentRole":36,"markdownContent":373,"audioMediaId":374},"Ordinal categorical variables versus discrete numeric variables","\nSometimes you might get confused between what is an ordinal categorical variable and what is a discrete numeric variable. \n\nCommon examples of ordinal categorical variables include things like star ratings. There’s no universally understood difference between one star and two stars. Is five stars necessarily five times as good as one star? Would you only pay twice as much for a restaurant with five stars as one with only two-and-a-half stars?\n\nOn the other hand, when it comes to discrete numeric variables, often called count data, we know that two cats is twice as many as one cat. We also know that the difference between three cats and four cats is the same as the difference between eight cats and nine cats. \n\nNot all ordinal categorical variables are numeric, though. Consider for example things like education level, where we have the categories ranked in a clear order like ‘primary school’, ‘high school’, ‘college’. The data has a hierarchy, but the difference between each stage isn’t uniform or knowable, just like with our five star rating example. \n","e178cdbf-b35c-4080-aa86-896d1467d7c8",[376],{"id":377,"data":378,"type":53,"version":25,"maxContentLevel":21},"13096af5-5fc8-4583-8a1d-45b8bea9201e",{"type":53,"reviewType":25,"spacingBehaviour":25,"activeRecallQuestion":379,"activeRecallAnswers":381},[380],"What is the difference between ordinal categorical variables and discrete numeric variables?",[382],"Discrete numeric variables have a uniform difference between each stage, while ordinal categorical variables do not.",{"id":384,"data":385,"type":25,"maxContentLevel":21,"version":25,"reviews":389},"9de5a8b1-da6f-4a26-a21b-b57298337acb",{"type":25,"title":386,"contentRole":36,"markdownContent":387,"audioMediaId":388},"Nominal data ","\nNominal data is categorical data, like your preferred mode of transport, be it bus, car, or bicycle, that cannot be ordered in a meaningful way. As a counterexample, the numbers on each side of a die can be ordered – 3 is greater than 2 and 2 is greater than one. \n\nSure, you could say that a bus is greater than a car and a car is greater than a bicycle, because each differs greatly in size – but it doesn’t work that way with data. While they appear bigger to you, they’re just categories of something. Now if you weighed them or took the volume of them then that’s another story, but then you’re dealing with continuous data. \n\nFor an easier example, let’s take the colors of cars. Blue is not greater than red, which is not greater than green. So you see how they’re all the same, but different. You can’t order them in any objectively meaningful way. \n\n ![Graph](image://3945a0ba-76a9-44aa-af86-b58a5eaf46d1 \"A red Ferrarri\")\n\nActually ‘nominal’ comes from ‘name’. So, you name the things to help you remember and make it easier to identify. Like when the police are chasing a red ferrari. ‘Red’ and ‘Ferrari’ are both examples of nominal data. Good luck catching it though!\n\n","530f4caf-6ab5-41b3-9462-f6ea7fe5c4fc",[390],{"id":391,"data":392,"type":53,"version":25,"maxContentLevel":21},"e2008f4e-927f-4222-b13c-a6bb5124109c",{"type":53,"reviewType":70,"spacingBehaviour":25,"clozeQuestion":393,"clozeWords":395},[394],"Nominal data is categorical data that cannot be ordered in a meaningful way.",[396],"ordered",{"id":398,"data":399,"type":25,"maxContentLevel":21,"version":25,"reviews":403},"cbbadb19-4c7e-4126-b56f-7c53ce39af76",{"type":25,"title":400,"contentRole":36,"markdownContent":401,"audioMediaId":402},"Categorical Variables","\nCategorical variables represent groups – for example classifications like brand of car, and hierarchical rankings like educational level. You might also hear categorical variables referred to as ‘qualitative variables’, as opposed to ‘quantitative variables’ which are continuous variables. \n\nTypes of categorical variables include ordinal data, nominal data, and binary data. With categorical variables we can organize our data into groups so that we can compare them between one another. As an example, grouping each product that you sell and plotting the revenue that it brings in with a pie chart. \n\nCategorical variables like gender or age group are commonly used in statistical analysis, enabling comparison and testing of differences between groups for things like medical interventions and more.\n","657f06cb-d81e-4796-892c-e479c82e6402",[404],{"id":405,"data":406,"type":53,"version":25,"maxContentLevel":21},"621f9a0f-c9f6-4ab2-a1b1-2ed50de31a5f",{"type":53,"reviewType":25,"spacingBehaviour":25,"activeRecallQuestion":407,"activeRecallAnswers":409},[408],"What are categorical variables also known as?",[410],"Qualitative variables",{"id":412,"data":413,"type":25,"maxContentLevel":21,"version":25,"reviews":417},"306743a1-9f3d-4471-a360-89805a5067f3",{"type":25,"title":414,"contentRole":36,"markdownContent":415,"audioMediaId":416},"Ordinal variables","\nAs we already discussed, ordinal variables are categorical variables, but you can also order them meaningfully, often from high to low. Put simply, they have relative value but no value of their own accord.  \n\nOne example everyone will be familiar with is ‘education level’. Sure, the names for the different stages of school differ all around the world, but there is one common theme – each stage is ‘higher’ than the previous stage. For example, primary school, middle school, and high school. \n\n ![Graph](image://452e58b5-8350-42f0-b115-fc8d346fa936 \"Relative opinions on chocolate milk would be an ordinal variable\")\n\nAnother example is if I were to ask you how much you like chocolate milk. I could give you the options ‘extremely dislike’, ‘dislike’, ‘neutral’, ‘like’, and ‘extremely like’. Well, that’s ordinal data too, because it tells me how much you like something. \n\nSo I can say that you like chocolate milk more than, or less than, my neighbor Jenny. However, if I said that ‘green car more than blue car’ my words would not convey a clear meaning. That’s the difference between ordinal and nominal variables; ordinal values tell you about the relative qualities of units, whereas nominal ones only tell you about a quality of that unit.\n\n\n","e2ad0246-5832-475d-98ce-5c8df2bb615e",[418],{"id":419,"data":420,"type":53,"version":25,"maxContentLevel":21},"20b23c38-b01b-4097-9990-95f514014844",{"type":53,"reviewType":70,"spacingBehaviour":25,"clozeQuestion":421,"clozeWords":423},[422],"Ordinal variables have relative value and can be used to compare one another.",[424],"one another",{"id":426,"data":427,"type":36,"version":25,"maxContentLevel":21,"pages":429},"3b3e9fcd-f177-44a3-9abd-5534932e8d40",{"type":36,"title":428},"Independent and Dependent Variables",[430,454,467,482],{"id":431,"data":432,"type":25,"maxContentLevel":21,"version":25,"reviews":436},"5a08c0ff-f30f-49c1-8d9c-7deebfe47ebe",{"type":25,"title":433,"contentRole":36,"markdownContent":434,"audioMediaId":435},"Independent and Dependent Variables ","\nWhen a statistician conducts a research experiment, they are typically interested in whether an independent variable will influence a dependent variable. An independent variable is a value that does not depend on any other value, and a dependent variable is a value that changes depending on how the independent variable changes. \n\nFor example, in a study on the effect of studying on test scores, the independent variable would be the amount of time spent studying, and the dependent variable would be the test scores. The researchers intentionally manipulate the independent variable – say by making people study more or less – and see how it impacts the dependent variable – in this case their test scores. \n","7c214a4a-8283-478b-9754-28f64926b446",[437,448],{"id":438,"data":439,"type":53,"version":25,"maxContentLevel":21},"aeacb7f1-83a4-4bee-a8b0-5769557ad724",{"type":53,"reviewType":21,"spacingBehaviour":25,"multiChoiceQuestion":440,"multiChoiceCorrect":442,"multiChoiceIncorrect":444},[441],"What is the value that is intentionally manipulated in a research experiment?",[443],"Independent variable",[445,446,447],"Dependent variable","Ordinal variable","Categorical variable",{"id":449,"data":450,"type":53,"version":25,"maxContentLevel":21},"ea1a81d2-f119-47fd-8e3d-eea4e735d02f",{"type":53,"reviewType":25,"spacingBehaviour":25,"activeRecallQuestion":451,"activeRecallAnswers":453},[452],"What is a value that changes depending on how the independent variable changes?",[445],{"id":455,"data":456,"type":25,"maxContentLevel":21,"version":25,"reviews":460},"22157a3f-8ba9-48c4-afa0-13a833283843",{"type":25,"title":457,"contentRole":36,"markdownContent":458,"audioMediaId":459},"Independent vs Dependent","\nAn independent variable is the cause. It is what influences the dependent variable, and it is independent of any other variables in your study. \n\n ![Graph](image://43d291f3-95ee-4001-b5f0-657f77a035bb \"A chicken roasting in the oven\")\n\nThe next time you are cooking something, consider this: the temperature you use is the independent variable, and cooking time is the dependent variable – the result of your manipulation of the independent variable. \n\nOnce the oven's preheated, your cooking time cannot in and of itself have an effect on the temperature of the oven. However, the temperature you choose will have an effect on how long you need to cook your food for.\n\n","982cbcb5-3c7c-488c-9dad-431a5cc126cd",[461],{"id":462,"data":463,"type":53,"version":25,"maxContentLevel":21},"99f3edba-f15c-4aa9-8d41-276737f285f3",{"type":53,"reviewType":25,"spacingBehaviour":25,"activeRecallQuestion":464,"activeRecallAnswers":466},[465],"What is the term for the variable that is manipulated to influence the dependent variable?",[443],{"id":468,"data":469,"type":25,"maxContentLevel":21,"version":25,"reviews":473},"38137aca-2eef-49d9-8c73-2037f7bd5d06",{"type":25,"title":470,"contentRole":36,"markdownContent":471,"audioMediaId":472},"Identifying Independent Variables ","\nIndependent variables can be identified by meeting these two criteria. \n\nOne – it comes before the other variable in time, for example, students take a new brain enhancing energy drink before a test. \n\n ![Graph](image://d25e4f11-fabd-4d59-9762-1fa26ca2ae8e \"The variable that occurs earlier in time is usually the independent variable\")\n\nTwo – the variable is manipulated in some way, or used as a method for grouping by the researcher. Consider for example that we either adjusted the dose of the new energy drink – which means we manipulated it – or we administered it to some students but not others – in which case we used it as a grouping method.\n\nThe goal of this research is to find out how this variable influences another variable. In our case, whether the energy drink influences test scores. \n\n","3c8a1199-e062-4492-bff8-9f9ea37968d2",[474],{"id":475,"data":476,"type":53,"version":25,"maxContentLevel":21},"fc90ea73-d868-48ee-b620-545b88fd3be2",{"type":53,"reviewType":25,"spacingBehaviour":25,"activeRecallQuestion":477,"activeRecallAnswers":479},[478],"How can independent variables be identified?",[480,481],"It comes before the other variable in time","It is manipulated in some way by the researcher",{"id":483,"data":484,"type":25,"maxContentLevel":21,"version":25,"reviews":488},"1af07c20-ed32-49fb-85bb-e5ac6c68966d",{"type":25,"title":485,"contentRole":36,"markdownContent":486,"audioMediaId":487},"Identifying Dependent Variables ","\n ![Graph](image://1b62c8ec-1251-42e7-9bd1-9c84537afa5b \"Independent vs dependent variables\")\n\nThere are three simple criteria to check whether you are dealing with a dependent variable. \n\nOne – is this variable considered an outcome in your study. As an example, do you expect to find higher employee engagement as a result of your new company culture change initiative? Then employee engagement is an outcome. \n\nTwo – is this variable dependent on other variables in your study? In our case, employee engagement is dependent on the company culture change initiative. \n\nThree – this variable gets measured after a change or manipulation is made to another variable. In the case of employee engagement, you want to test if it improves. So you measure it before you administer the culture change initiative. But you also measure it after. \n\n","bf387476-af6e-4f48-b165-138609af0907",[489],{"id":490,"data":491,"type":53,"version":25,"maxContentLevel":21},"927047ef-6398-43f6-87f9-183af5881bcb",{"type":53,"reviewType":70,"spacingBehaviour":25,"clozeQuestion":492,"clozeWords":494},[493],"A variable is dependent if it is considered an outcome in your study.",[495],"outcome",{"id":497,"data":498,"type":29,"maxContentLevel":21,"version":36,"orbs":501},"24e62eac-27a5-4118-b607-98752c31a498",{"type":29,"title":499,"tagline":500},"Data Exploration ","Uncover hidden gems in your data, and learn how to visualize relationships between variables",[502,555,606],{"id":503,"data":504,"type":36,"version":25,"maxContentLevel":21,"pages":506},"561a9002-3464-40d1-8ddf-9cb63898489e",{"type":36,"title":505},"Introduction to EDA",[507,521,539],{"id":508,"data":509,"type":25,"maxContentLevel":21,"version":25,"reviews":513},"56cd4a98-bd16-44e2-a962-0a0e25b5ae9a",{"type":25,"title":510,"contentRole":36,"markdownContent":511,"audioMediaId":512},"What is Exploratory Data Analysis? ","\nExploratory Data Analysis (EDA) involves locating and correcting missing values, checking relationships between variables, and extracting the most important or relevant variables that you will use for later statistical analysis or machine learning models. \n\n\n\n ![Graph](image://dc0afba1-9255-48ca-82e8-7571da1cdd5a \"EDA helps get an initial impression of the data. Image: RCraig09, CC BY-SA 4.0, via Wikimedia Commons\")\n\nWhen conducting EDA, you will do things like create graphs and plots to see the distribution of continuous variables, or locate and potentially remove things like outliers in your dataset, which are values that are different from your average or typical observations and that can affect your statistical test results or machine learning models. \n\n","b28af3f8-d495-413c-a602-a291a5472d45",[514],{"id":515,"data":516,"type":53,"version":25,"maxContentLevel":21},"481ccdd6-ff33-4e02-bf9c-8953d8687ca0",{"type":53,"reviewType":25,"spacingBehaviour":25,"activeRecallQuestion":517,"activeRecallAnswers":519},[518],"What is the acronym for the process of locating and correcting missing values and checking relationships between variables?",[520],"EDA (Exploratory Data Analysis)",{"id":522,"data":523,"type":25,"maxContentLevel":21,"version":25,"reviews":527},"4c1adfb3-4714-47aa-b9c9-e40d25e0cc06",{"type":25,"title":524,"contentRole":36,"markdownContent":525,"audioMediaId":526},"What is the goal of exploratory Data Analysis? ","\nThe goal of exploratory data analysis is to mine golden nuggets of insight from your dataset, as well as minimize any potential error that your dataset might cause when it comes to running statistical tests or passing it into machine learning models. \n\n ![Graph](image://ee64a7b2-2a1a-4c62-93a5-897f0a773cf0 \"Watch out for bad data in your model\")\n\nIf building a machine learning model is like making a fruit salad, you don’t want rotten fruit, in this case bad data, in your salad. You also don’t want the wrong ingredients, like tuna, in your fruit salad. These would be the wrong data points altogether. \n\nAs we say when it comes to data science, ‘garbage in, garbage out’, which means if you don’t properly explore, clean, and select your variables, which are your inputs, your model performance, which is your output, will suffer. \n","4922a5d8-c658-4331-888a-6a8e2830bb46",[528],{"id":529,"data":530,"type":53,"version":25,"maxContentLevel":21},"e7d5f3e8-734b-461f-ba66-25e71b191061",{"type":53,"reviewType":21,"spacingBehaviour":25,"multiChoiceQuestion":531,"multiChoiceCorrect":533,"multiChoiceIncorrect":535},[532],"What is the saying used to describe the importance of data quality when it comes to building machine learning models?",[534],"Garbage in, garbage out",[536,537,538],"Quality in, quality out","Data in, data out","Inputs in, outputs out",{"id":540,"data":541,"type":25,"maxContentLevel":21,"version":25,"reviews":545},"2045152e-0fbc-4eec-8cd7-092422be6353",{"type":25,"title":542,"contentRole":36,"markdownContent":543,"audioMediaId":544},"Data Cleaning","\nData cleaning is an essential process in any statistics, data analytics, or data science workflow. You may also hear it referred to as data cleansing or data scrubbing. \n\nDuring data cleaning, you fix or remove all kinds of data, to give yourself a clean dataset to work with. Examples of data that are problematic and need cleaning are incorrect data, missing values, duplicate values, or incorrectly formatted data. \n\nYou need to clean your data because if it is incorrect, the results from your statistical tests or machine learning algorithms will be unreliable.\n\nDuring data cleaning, you may also check for and remove outliers. These are values that are really far away from the mean. They might be due to measurement error, or you just happened to get a freakishly tall person in your small sample, so it might be best to remove them, otherwise your results might not generalize to the population. \n\nIt is commonly estimated that 80% of time during data science workflows is spent on data cleaning.\n","d86d8414-fa76-41a7-9a25-e180b95f9b88",[546],{"id":547,"data":548,"type":53,"version":25,"maxContentLevel":21},"4482b579-5fcd-4acc-828f-7aaa29160771",{"type":53,"reviewType":36,"spacingBehaviour":25,"binaryQuestion":549,"binaryCorrect":551,"binaryIncorrect":553},[550],"80% of time in a data science workflow is spent doing what?",[552],"Data cleaning",[554],"Data modelling",{"id":556,"data":557,"type":36,"version":25,"maxContentLevel":21,"pages":559},"6df754e8-b55e-49d0-92e1-50bc09060754",{"type":36,"title":558},"Techniques in EDA",[560,574,592],{"id":561,"data":562,"type":25,"maxContentLevel":21,"version":25,"reviews":566},"4b784e0a-23e2-4530-8893-c46f05af9d78",{"type":25,"title":563,"contentRole":36,"markdownContent":564,"audioMediaId":565},"Descriptive Statistics ","\nDescriptive statistics take all of your complex data and provide you with a simple and easy way to understand the most relevant points. For your continuous variables, you will be able to quickly see the average for each variable, the minimum and maximum values, and how many observations you have in your dataset for each variable.\n\nWith descriptive statistics, you can succinctly describe your dataset in a way that enables comparison between variables and other datasets. And while it won’t enable you to draw statistically significant conclusions, it will enable you to gain insights into where you should explore further. \n\nDescriptive statistics will show you things like the mean, median, or mode for your data. Alternatively, they might show how spread out your data is through things like the variance or standard deviation. \n\nDescriptive statistics can also show you the shape of your data, giving you an idea of whether it clusters around an average result, or whether it has a different distribution, which might cause problems for some statistical tests.\n","0f32928f-af93-4281-abcd-8bf72893b913",[567],{"id":568,"data":569,"type":53,"version":25,"maxContentLevel":21},"fb1a9a23-936b-4487-80b5-13159767243f",{"type":53,"reviewType":25,"spacingBehaviour":25,"activeRecallQuestion":570,"activeRecallAnswers":572},[571],"What type of statistics can be used to succinctly describe a dataset and enable comparison between variables and other datasets?",[573],"Descriptive statistics",{"id":575,"data":576,"type":25,"maxContentLevel":21,"version":25,"reviews":580},"8f6d71f4-b780-44d5-9c72-c013e7d581e9",{"type":25,"title":577,"contentRole":36,"markdownContent":578,"audioMediaId":579},"Graphical analysis","\nMuch like descriptive statistics enable you to get an overview of your data with simple numerical summaries, graphical analysis enables you to get a visual overview of your data. \n\nGraphical analysis is an important first step in data analysis, statistics, and data science. This is because without looking at them, you don’t really know what ingredients you’re putting in your fruit salad, and you might reach for the hot chilli if you’re cooking in the dark. \n\nVisual representations you can use include box plots, bar charts, pie charts, scatter plots, and histograms. \n","5baed626-f209-4bcf-bdf7-bb06e9420083",[581],{"id":582,"data":583,"type":53,"version":25,"maxContentLevel":21},"763df6f6-54df-4fef-b1ad-f31e01731a82",{"type":53,"reviewType":25,"spacingBehaviour":25,"activeRecallQuestion":584,"activeRecallAnswers":586},[585],"What are the five most common types of visual representation?",[587,588,589,590,591],"Box plots","Bar charts","Pie charts","Scatter plots","Histograms",{"id":593,"data":594,"type":25,"maxContentLevel":21,"version":25,"reviews":598},"c2a78677-25a3-4bf4-a150-85703d18a043",{"type":25,"title":595,"contentRole":36,"markdownContent":596,"audioMediaId":597},"Spotting errors in your nominal data ","Nominal categorical variables require special attention when conducting exploratory data analysis, because you cannot calculate the mean or standard deviation, nor the minimum and maximum. This is because nominal categorical variables are things like “car model” or “dog breed”. \n\nSo how do you conduct EDA for nominal variables? What should you look for? The first thing to do is check all of your unique variables. Imagine that you’re doing a survey of what pets people have and you get these answers: \n\n{“Cat”, “cat”, “Dog”, “parrot”, “Tesla”, nan} \n\nYou have a few problems. Well, actually, you have a lot of problems. \n\n ![Graph](image://09f55874-1aed-437c-9e00-e08624cfef2c \"Ideally your data will contain just the correct values\")\n\nFirst, you have both “Cat” and “cat” as options. Presumably these are the same thing. So you need to transform all instances of “cat” to “Cat” so that the two will rightfully be grouped together in any analyses. \n\nNext, you have a “Tesla'' in with your animals. Assuming Elon Musk has not started creating self walking Robot Dogs – though we don’t exclude the possibility – that means there’s some mistaken data. \n\nLastly, you have a missing value which in many statistical programs will show as ‘NaN’ or ‘null’. \n\n","61c98ff0-983d-48fd-ad83-87e6b698e579",[599],{"id":600,"data":601,"type":53,"version":25,"maxContentLevel":21},"92a705b0-b413-4875-b9d8-6c115ca737af",{"type":53,"reviewType":70,"spacingBehaviour":25,"clozeQuestion":602,"clozeWords":604},[603],"Mistaken data is where an item appears in your data that clearly shouldn't be there.",[605],"mistaken",{"id":607,"data":608,"type":36,"version":36,"maxContentLevel":21,"pages":610},"a8678870-034f-4bf4-8372-c9117fa88087",{"type":36,"title":609},"Handling Nominal Data",[611,625,642,656],{"id":612,"data":613,"type":25,"maxContentLevel":21,"version":36,"reviews":617},"e9c4aadc-2d78-43d0-b600-e249d8adf4f9",{"type":25,"title":614,"contentRole":36,"markdownContent":615,"audioMediaId":616},"Why you can’t calculate the mean for nominal data","It is not possible to calculate the mean, median, or percentile values for categorical variables such as car model, or day of the week. However, for continuous variables like stock price or height, you can calculate those metrics.\n\n![Graph](image://34538d91-aa31-425d-851e-0787591c611e \"Models of car are nominal data\")\n\nEven if a nominal variable is represented numerically, like a zip code, you still can’t calculate mean, median, or percentile values. This is because the average of a zip code number doesn’t mean anything, because they are not logically organised. A mean zip code of 7023.5 doesn’t convey any useful information.\n\nThe zip code most commonly found in your dataset does however tell you useful information.","9f6bf594-b30b-4b53-8312-3abe858c408b",[618],{"id":619,"data":620,"type":53,"version":25,"maxContentLevel":21},"b8e0e024-18da-48ca-bde6-9dcdd2a96bf5",{"type":53,"reviewType":70,"spacingBehaviour":25,"clozeQuestion":621,"clozeWords":623},[622],"You can’t calculate the mean, median, or percentile values for nominal variables, even if they are represented numerically.",[624],"nominal",{"id":626,"data":627,"type":25,"maxContentLevel":21,"version":36,"reviews":631},"693639f0-90d2-47b0-9105-c61c8b22f7ea",{"type":25,"title":628,"contentRole":36,"markdownContent":629,"audioMediaId":630},"Exploratory Data analysis for nominal variables","When conducting EDA for a nominal variable, after exploring and cleaning the data, you run a frequency analysis to show you both the gross count for each category, as well as the percentage of the dataset that each category makes up.\n\nThis can give you useful information, like understanding which models of car are most popular for your customers, or where visitors of an international festival have come from.\n\nJust remember that when reporting on information from your frequency analysis, you can only make statements about your data set. You cannot estimate the frequency of different car models in the broader population just from your sample data, for example. Exploratory analysis can only draw conclusions about the dataset it is working with.","53d64df1-a95f-4a56-94fd-7f46bdd85f7b",[632],{"id":633,"data":634,"type":53,"version":25,"maxContentLevel":21},"1ede1e0b-62ee-425f-bd82-6c37bd639c90",{"type":53,"reviewType":21,"spacingBehaviour":25,"multiChoiceQuestion":635,"multiChoiceCorrect":637,"multiChoiceIncorrect":639},[636],"What type of analysis can be used to understand the percentage of a dataset that each category makes up?",[638],"Frequency analysis",[640,641,139],"Regression analysis","Correlation analysis",{"id":643,"data":644,"type":25,"maxContentLevel":21,"version":36,"reviews":647},"4b877382-3701-45b1-b1ef-36992f1c12c6",{"type":25,"title":414,"contentRole":36,"markdownContent":645,"audioMediaId":646},"During analysis, it’s important not to fall into the trap of treating ordinal variables such as the numbers on a die exactly as you would treat a continuous variable like height.\n\n![Graph](image://b45bb8a5-762e-4fc5-9de4-d0f610794b69 \"Numbers on dice are ordinal variables\")\n\nAs an example, let’s say you have a die, and you’re playing a game with your friends. You want to know which number you are most likely to roll. Maybe you suspect that the die is loaded, and your friend has been cheating.\n\nSo you roll the die lots of times, and record each value. Now you need to check which value is most common.\n\nDo you calculate the mean? No, because that doesn’t tell you which value is most common, it tells you the average. Moreover, it will give you a decimal value, like 2.5, which is not a number that exists on the die.\n\nSo while a dice roll can be represented numerically, you can’t treat it like continuous data.\n\nThe analysis you need is a frequency analysis, which counts how many times each discrete value was seen in your dataset. If you see the number 6 in 60 of our 120 rolls, equivalent to 50% of the time, then you might start to get suspicious that the die is in fact loaded, because you would expect to see it approximately only in 20 rolls, or 16.66% of the time.","134f0019-c473-4da3-bfea-af7f9d6d2143",[648],{"id":649,"data":650,"type":53,"version":25,"maxContentLevel":21},"6cd6fcd4-4fef-469b-91a5-d731bae1a233",{"type":53,"reviewType":21,"spacingBehaviour":25,"multiChoiceQuestion":651,"multiChoiceCorrect":653,"multiChoiceIncorrect":654},[652],"What type of analysis should be used to determine which number is most common when rolling a die?",[638],[640,655,139],"Exploratory analysis",{"id":657,"data":658,"type":25,"maxContentLevel":21,"version":36,"reviews":662},"f5ee8d20-1a44-4d82-a6cd-2444fe028e2c",{"type":25,"title":659,"contentRole":36,"markdownContent":660,"audioMediaId":661},"Presenting count data as a rate","There are instances where you will be able to present your frequencies of observations as useful summary statistics in the form of rates. Let’s look at a grim example: the murder rate, which is sometimes called the homicide rate.\n\n![Graph](image://fc6b5ef0-cbc3-40da-9ca0-6db3ac71a2b2 \"A graph demonstrating homicide rates\")\n\nThe homicide rate is calculated as follows: (Number of murders / total population)\\*100,000\n\nWhy is it multiplied by 100,000? This is because that gives us the number of murders per 100,000 people. You could also choose a different number for your data, but for homicides we normally use 100,000.\n\nWhile you input into this equation a discrete ordinal variable, the number of murders, the result is a rate, for example “0.2 murders per 100,000 people”. This happens to be the homicide rate in Japan as of 2022, which is the safest country in the world when it comes to this statistic.","1a4b9a6b-5fd1-449d-a7b8-1b27eaaf8377",[663],{"id":664,"data":665,"type":53,"version":36,"maxContentLevel":21},"f7b40bf4-4977-47a2-b4fe-f9e7929f746b",{"type":53,"reviewType":36,"spacingBehaviour":25,"binaryQuestion":666,"binaryCorrect":668,"binaryIncorrect":670},[667],"The homicide rate per 100,000 people would be the number of murders/total population, multiplied by...",[669],"100,000",[671],"10,000",{"id":673,"data":674,"type":29,"maxContentLevel":21,"version":25,"orbs":677},"40a2de83-0088-46b0-b158-981298696af2",{"type":29,"title":675,"tagline":676},"Representations of Data","Learn how to visually summarise your data for analysis and reporting",[678,714,757,805],{"id":679,"data":680,"type":36,"version":25,"maxContentLevel":21,"pages":682},"ec2af0a2-5793-4420-90bf-c28f359fa3d9",{"type":36,"title":681},"Understanding Histograms",[683,698],{"id":684,"data":685,"type":25,"maxContentLevel":21,"version":25,"reviews":688},"71a4907f-574f-4e49-9b79-571eed0314b5",{"type":25,"title":591,"contentRole":36,"markdownContent":686,"audioMediaId":687},"\nHistograms are used to show us the distribution of continuous variables in our samples. It helps you visualize the frequency of your continuous data. Each observation in your sample should have a numerical value attached to it. \n\n ![Graph](image://7388046d-dd53-4857-b687-9cff1d571671 \"A typical histogram\")\n\nFor example, a histogram could show the weight of each dog in your neighborhood. The weight is a continuous variable, each dog is an observation in your sample, and the neighbourhood is your population of interest.\n\n","b5dd11f5-df1c-46ec-9d93-0f2f713c6011",[689],{"id":690,"data":691,"type":53,"version":25,"maxContentLevel":21},"eea69a0f-64f7-4a24-9773-d1a14a97d8c8",{"type":53,"reviewType":36,"spacingBehaviour":25,"binaryQuestion":692,"binaryCorrect":694,"binaryIncorrect":696},[693],"What type of graph is usually used to show the distribution of continuous variables in a sample?",[695],"Histogram",[697],"Pie Chart",{"id":699,"data":700,"type":25,"maxContentLevel":21,"version":25,"reviews":704},"b5ced7e2-814b-4e70-b9fe-442992697bb2",{"type":25,"title":701,"contentRole":36,"markdownContent":702,"audioMediaId":703},"How does a histogram work?","\nA histogram groups your data into buckets and counts how many of your observations fell into that bucket. First, you set ranges for groups. They might be 0-5kg, 5-10kg, 10-15kg and so on. Then you count the number of dogs that had a weight within that range. \n\nThese buckets need not be of equal width. If they are, then the number on the y-axis is equal to the frequency. If they are not, then the number on the y-axis is not the raw frequency, but the frequency density. Keep that in mind when interpreting your histograms.\n\nWith a histogram you can easily visualize the mean and median of the data, how spread out the data is, and even whether it seems to be normally distributed, or skewed. These are all very important things you need to know about your data! \n","04afb609-df53-4180-afde-679ea3aa8752",[705],{"id":706,"data":707,"type":53,"version":25,"maxContentLevel":21},"190929c3-fc5c-48ed-b060-8f8d97cd3844",{"type":53,"reviewType":36,"spacingBehaviour":25,"binaryQuestion":708,"binaryCorrect":710,"binaryIncorrect":712},[709],"What does a histogram allow us to easily visualize?",[711],"Averages from your data",[713],"The distribution of outliers",{"id":715,"data":716,"type":36,"version":25,"maxContentLevel":21,"pages":718},"0d826ba5-2004-4c50-a7c8-8e2549490679",{"type":36,"title":717},"Understanding Bar Charts",[719,724,739],{"id":720,"data":721,"type":25,"maxContentLevel":21,"version":25},"34bd61fe-4e50-462d-9181-9e06e4978fe2",{"type":25,"title":588,"contentRole":36,"markdownContent":722,"audioMediaId":723},"\nBar charts are the distant cousin of the histogram. However, bar charts are used for categorical data like the number of ginger cats in your neighborhood, and not continuous data like their weight.\n\n ![Graph](image://aea3b026-7363-4063-a906-9447df826076 \"Two bar charts\")\n\nBar charts are a nice visual way to represent the frequencies of each category, for our categorical data, meaning how many times they occurred, in your dataset. You can see which categories were most commonly found, and which were least common in your data. And you can see how much of each there was. \n\nYour data can be either nominal – where there’s no hierarchy, like car color – or ordinal – where there is a hierarchy, like educational attainment.\n\n","2c475f65-a4bb-444e-bc7d-787b420b8eb3",{"id":725,"data":726,"type":25,"maxContentLevel":21,"version":25,"reviews":730},"db98ea64-63a2-4f83-8d6c-b00ad795c258",{"type":25,"title":727,"contentRole":36,"markdownContent":728,"audioMediaId":729},"Horizontal bar charts ","Generally, on a bar chart the y-axis shows you how many observations you counted within that category. On the x-axis are the different categories in your data. \n\nHowever, you can also have it the opposite way around to make a horizontal bar chart. This is something you can’t do for a histogram, which is used for continuous data, not categorical data. While in a histogram, you can only present the frequency counts on the y-axis, both horizontal and vertical work with a bar chart.\n","2c5f5385-e4ce-4d0b-9f1a-f173f13c2348",[731],{"id":732,"data":733,"type":53,"version":25,"maxContentLevel":21},"932d15ac-f55d-4f6e-a73d-6a214f784079",{"type":53,"reviewType":70,"spacingBehaviour":25,"clozeQuestion":734,"clozeWords":736},[735],"A bar chart can be used to represent categorical data, while a histogram is used for continuous data.",[737,738],"categorical","continuous",{"id":740,"data":741,"type":25,"maxContentLevel":21,"version":25,"reviews":745},"cfad6dbe-4634-43bc-8315-6d884ef9529b",{"type":25,"title":742,"contentRole":36,"markdownContent":743,"audioMediaId":744},"Differences between bar charts and histograms ","\nBar charts are used to count frequencies of things, like the number of blue, red, and white cars you see on the highway. A histogram doesn’t count members of a category in the same way that a bar chart does. It counts observations that have been measured first, like the weight of each dog in your neighborhood. \n\nYou’ll only ever see a gap in a histogram if there’s no observations counted for that range. Otherwise, the bars of a histogram are always right up against one another and always vertical. \n\nValues on a histogram are always ordered from lowest to highest. On the other hand, the bars on a bar chart can be ordered any way you please.\n","177f0368-2a5d-4231-94d3-c3c115c68b6a",[746],{"id":747,"data":748,"type":53,"version":25,"maxContentLevel":21},"37037c6a-ec04-4de3-bf89-ce25176a6960",{"type":53,"reviewType":21,"spacingBehaviour":25,"multiChoiceQuestion":749,"multiChoiceCorrect":751,"multiChoiceIncorrect":753},[750],"How are the bars of a histogram typically ordered?",[752],"From lowest to highest",[754,755,756],"Alphabetically","From highest to lowest","Randomly",{"id":758,"data":759,"type":36,"version":25,"maxContentLevel":21,"pages":761},"4884a71b-95bf-4b23-8191-dbed7b121424",{"type":36,"title":760},"Interpreting Boxplots",[762,787],{"id":763,"data":764,"type":25,"maxContentLevel":21,"version":25,"reviews":768},"01cf1e66-d35f-4132-9bba-296288469e99",{"type":25,"title":765,"contentRole":36,"markdownContent":766,"audioMediaId":767},"The basics of boxplots","\n ![Graph](image://514b1529-80a7-4120-b96d-0ed90161de14 \"A boxplot chart with the curve illustrated\")\n\n\nA boxplot is a type of chart that is often used in data science to visually display a dataset's distribution. It consists of a \"box\" that is defined by the upper and lower quartiles of the dataset. \n\nThe \"whiskers\" extending from the box represent the range of the data, while the line through the center of the box represents the median. The Interquartile Range (IQR) is the distance between the upper and lower quartiles.\n\nOne of the key things that boxplots are used for is identifying outliers. Outliers are data points that are unusually high or low compared to the rest of the dataset. Boxplots usually depict outliers as points outside the whiskers, and it's important to take note of them as they can skew your analysis.\n\n","287da744-963d-42d9-9826-51651fdab73d",[769,780],{"id":770,"data":771,"type":53,"version":25,"maxContentLevel":21},"45b9db78-53ba-4a5f-a8c4-c579751c9f48",{"type":53,"reviewType":21,"spacingBehaviour":25,"multiChoiceQuestion":772,"multiChoiceCorrect":774,"multiChoiceIncorrect":776},[773],"What is the Interquartile Range (IQR) in a boxplot?",[775],"The distance between the upper and lower quartiles",[777,778,779],"The distance between the upper and lower whiskers","The distance between the median and the upper quartile","The distance between the median and the lower quartile",{"id":781,"data":782,"type":53,"version":25,"maxContentLevel":21},"67a86893-75d0-4b26-a06c-614682c0ad2e",{"type":53,"reviewType":70,"spacingBehaviour":25,"clozeQuestion":783,"clozeWords":785},[784],"Boxplots are used to identify outliers, which are data points that are unusually high or low.",[786],"outliers",{"id":788,"data":789,"type":25,"maxContentLevel":21,"version":25,"reviews":793},"b42a31a0-1f76-43c8-9508-df682f1a37d1",{"type":25,"title":790,"contentRole":36,"markdownContent":791,"audioMediaId":792},"Reading a boxplot ","\n\nWhen reading a boxplot, it's important to pay attention to the different components and what they represent. The median line in the center of the box will tell you the midpoint of the dataset, while the quartiles can tell you how the data is distributed. Within the ‘box’ is the middle 50% of data. \n\n ![Graph](image://c2a74764-f992-469f-a1f4-242278d4f683 \"A boxplot chart\")\n\nTo calculate the whiskers, you'll want to use the Interquartile Range (IQR). Typically, the upper whisker will be located at the smaller of either the maximum data value or Q3 + 1.5(IQR), where Q3 is the upper quartile. The lower whisker is typically located at the larger of either the minimum data value or Q1 - 1.5(IQR), where Q1 is the lower quartile.\n\nAs an example, if you have a dataset with a lower quartile of 20, an upper quartile of 30, with the IQR therefore 10, the upper whisker will be located at 45 (30 + 1.5(10)) and the lower whisker will be located at 5 (20 - 1.5(10)).The whiskers are known as the ‘maximum’ and ‘minimum’ points on your graph, but you may still have data points beyond these - they are known as ‘outliers’.\n\n","f7cca13a-c6fd-4d66-9f3c-4bd9fe659b6c",[794],{"id":795,"data":796,"type":53,"version":25,"maxContentLevel":21},"1f317f1b-fdf5-49c7-8446-fe2c9b771210",{"type":53,"reviewType":21,"spacingBehaviour":25,"multiChoiceQuestion":797,"multiChoiceCorrect":799,"multiChoiceIncorrect":801},[798],"What is calculated using IQR in a box plot?",[800],"The whiskers",[802,803,804],"The median","The quartiles","The outliers",{"id":806,"data":807,"type":36,"version":25,"maxContentLevel":21,"pages":809},"1cfc9400-d747-4d48-86ec-48f636334445",{"type":36,"title":808},"Using Scatter Plots",[810,826],{"id":811,"data":812,"type":25,"maxContentLevel":21,"version":25,"reviews":816},"1077f796-a57b-4391-a159-7be6d621a1df",{"type":25,"title":813,"contentRole":36,"markdownContent":814,"audioMediaId":815},"What is a scatter plot ","\nA scatterplot shows us the relationship between two continuous variables. It’s often the first step in visualizing correlations in your data. Correlation is the degree to which two variables are seemingly related, like how long you spend working out at the gym and how many calories you burn.\n\n ![Graph](image://7b4196d3-56cd-4f1a-9f34-85792dcb66ac \"Scatter plot charts\")\n\n But sometimes things can be correlated but unrelated, like ice cream sales and shark attacks. Both happen to increase in summer, but one doesn’t cause the other. \n\n","f88a070d-ca90-42df-9813-19ca8ac1a6bf",[817],{"id":818,"data":819,"type":53,"version":25,"maxContentLevel":21},"e88c9f3b-93b3-4d77-b0ef-00faf1f7f421",{"type":53,"reviewType":36,"spacingBehaviour":25,"binaryQuestion":820,"binaryCorrect":822,"binaryIncorrect":824},[821],"What type of graph is used to visualize the relationship between two continuous variables?",[823],"Scatterplot",[825],"Bar graph",{"id":827,"data":828,"type":25,"maxContentLevel":21,"version":25,"reviews":832},"ebaa853a-820f-4710-8a0f-6e0e03fbcac4",{"type":25,"title":829,"contentRole":36,"markdownContent":830,"audioMediaId":831},"When to use a scatter plot","\nWhen should you use a scatter plot? Let’s say we have a sample of observations. For each observation we have two measurements – both should be continuous variables. As an example, we could have data on weight and the amount of swimming time it takes to fatigue. We want to know if lighter mice can swim for longer than heavier mice. \n\nTo plot the data, we use a scatter plot. Each dot represents both the weight of the mouse and minutes spent swimming. If weight is on the X-axis, the horizontal line, then the further to the right the dot is, the more the mouse weighs. And the higher the dot is on the Y-axis, the line pointing vertically, the longer the mouse swims. \n\nAs an aside, mice can swim for a super long time. Some for over ten hours, because they’re naturally very buoyant. But please don’t go throwing any into the bathtub.\n","d3f80d1b-5e93-4ae0-a712-420097b1e1de",[833],{"id":834,"data":835,"type":53,"version":25,"maxContentLevel":21},"5ad0dc13-de53-4f9a-983d-920f6ad23793",{"type":53,"reviewType":21,"spacingBehaviour":25,"multiChoiceQuestion":836,"multiChoiceCorrect":838,"multiChoiceIncorrect":840},[837],"What type of graph should be used to plot the data of weight and swimming time for mice?",[839],"Scatter plot",[841,825,842],"Pie chart","Line graph",{"left":4,"top":4,"width":844,"height":844,"rotate":4,"vFlip":6,"hFlip":6,"body":845},24,"\u003Cpath fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\" d=\"m9 18l6-6l-6-6\"/>",{"left":4,"top":4,"width":844,"height":844,"rotate":4,"vFlip":6,"hFlip":6,"body":847},"\u003Cg fill=\"none\" stroke=\"currentColor\" stroke-linecap=\"round\" stroke-linejoin=\"round\" stroke-width=\"2\">\u003Cpath d=\"M12.586 2.586A2 2 0 0 0 11.172 2H4a2 2 0 0 0-2 2v7.172a2 2 0 0 0 .586 1.414l8.704 8.704a2.426 2.426 0 0 0 3.42 0l6.58-6.58a2.426 2.426 0 0 0 0-3.42z\"/>\u003Ccircle cx=\"7.5\" cy=\"7.5\" r=\".5\" fill=\"currentColor\"/>\u003C/g>",1778179495021]