── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.4.0 ✔ purrr 1.0.0
✔ tibble 3.2.1 ✔ dplyr 1.1.2
✔ tidyr 1.2.1 ✔ stringr 1.5.0
✔ readr 2.1.3 ✔ forcats 0.5.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
Attaching package: 'janitor'
The following objects are masked from 'package:stats':
chisq.test, fisher.test
library (dplyr)
library (tidyr)
UPDATED EDA WITH NEW DATA
Data Collection & Exploratory data analysis
State Allocation vs Crime Rate Analysis 2015
finance <- read.csv ("data/finance.csv" )
crime15 <- read.csv ("data/crime15.csv" )
crime15 |>
mutate (State = if_else (state == "IOWA4" , "IOWA" , state)) |>
mutate (State = if_else (state == "NORTH CAROLINA5" , "NORTH CAROLINA" , state))
state population violent_crime murder_and_nonnegligent_manslaughter
1 ALABAMA 4858979 22952 348
2 ALASKA 738432 5392 59
3 ARIZONA 6828065 28012 309
4 ARKANSAS 2978204 15526 181
5 CALIFORNIA 39144818 166883 1861
6 COLORADO 5456574 17515 176
7 CONNECTICUT 3590886 7845 117
8 DELAWARE 945934 4720 63
9 FLORIDA 20271272 93626 1041
10 GEORGIA 10214860 38643 615
11 HAWAII 1431603 4201 19
12 IDAHO 1654930 3568 32
13 ILLINOIS 12859995 49354 744
14 INDIANA 6619680 25653 373
15 IOWA 3123899 8936 72
16 KANSAS 2911641 11353 128
17 KENTUCKY 4425092 9676 209
18 LOUISIANA 4670724 25208 481
19 MAINE 1329328 1729 23
20 MARYLAND 6006401 27462 516
21 MASSACHUSETTS 6794422 26562 128
22 MICHIGAN 9922576 41231 571
23 MINNESOTA 5489594 13319 133
24 MISSISSIPPI 2992333 8254 259
25 MISSOURI 6083672 30261 502
26 MONTANA 1032949 3611 36
27 NEBRASKA 1896190 5212 62
28 NEVADA 2890845 20118 178
29 NEW HAMPSHIRE 1330608 2652 14
30 NEW JERSEY 8958013 22879 363
31 NEW MEXICO 2085109 13681 117
32 NEW YORK 19795791 75165 609
33 NORTH CAROLINA 10042802 34852 517
34 NORTH DAKOTA 756927 1812 21
35 OHIO 11613423 33898 500
36 OKLAHOMA 3911338 16506 234
37 OREGON 4028977 10468 99
38 PENNSYLVANIA 12802503 40339 658
39 RHODE ISLAND 1056298 2562 29
40 SOUTH CAROLINA 4896146 24700 399
41 SOUTH DAKOTA 858469 3289 32
42 TENNESSEE 6600299 40400 406
43 TEXAS 27469114 113227 1316
44 UTAH 2995919 7071 54
45 VERMONT 626042 739 10
46 VIRGINIA 8382993 16399 383
47 WASHINGTON 7170351 20394 211
48 WEST VIRGINIA 1844128 6231 70
49 WISCONSIN 5771337 17647 240
50 WYOMING 586107 1302 16
rape robbery aggravated_assault property_crime burglary larceny_theft
1 2039 4611 15954 144746 35255 99156
2 901 761 3671 20806 3511 15249
3 3108 6360 18235 207107 37957 152365
4 1931 2098 11316 96836 22640 68424
5 12811 52862 99349 1024914 197404 656517
6 3257 3323 10759 144136 23454 104682
7 773 2892 4063 65066 10053 48675
8 341 1235 3081 25455 4773 19501
9 7553 21137 63895 570270 109268 420341
10 3224 12247 22557 308723 66374 215867
11 561 1203 2418 54346 6557 42010
12 694 192 2650 28858 6124 20863
13 4821 14910 28879 255729 46443 191634
14 2404 7111 15765 171847 34410 123918
15 1156 1047 6661 63957 14892 44723
16 1615 1818 7792 79199 15362 56880
17 1492 3307 4668 96362 22260 66320
18 1723 5550 17454 156629 35453 111435
19 474 311 921 24327 4684 18829
20 1666 9863 15417 139048 25678 100219
21 2075 5288 19071 114871 21890 84912
22 6450 7796 26414 187101 40041 131296
23 2321 3771 7094 121984 19299 94704
24 1203 2294 4498 84790 24799 55748
25 2553 6376 20830 173642 34006 122637
26 547 210 2818 27100 3838 20844
27 873 994 3283 42495 6422 32072
28 1688 6287 11965 77137 22360 43426
29 627 468 1543 23229 3467 18871
30 1373 9729 11414 145701 27960 105963
31 1672 2485 9407 77094 17085 51483
32 6074 23936 44546 317529 44276 257940
33 2684 8825 22826 276183 74841 187907
34 345 148 1298 16020 2997 11440
35 5149 12554 15695 300525 69303 213993
36 1849 3005 11418 112878 28406 74022
37 1593 2146 6630 118719 18336 89836
38 4305 13003 22373 232085 39664 180287
39 459 556 1518 20043 3947 14707
40 2297 3931 18073 161245 34551 113724
41 495 216 2546 16680 2960 12532
42 2676 7474 29844 193796 43247 137679
43 12250 31934 67727 777739 153054 557200
44 1645 1326 4046 89278 12468 68103
45 136 101 492 8806 1968 6660
46 2340 4441 9235 156470 21340 127019
47 2705 5449 12029 248369 50993 170509
48 672 760 4729 37251 9170 25842
49 1780 5232 10395 113924 19554 83385
50 173 59 1054 11151 1762 8797
motor_vehicle_theft State
1 10335 ALABAMA
2 2046 ALASKA
3 16785 ARIZONA
4 5772 ARKANSAS
5 170993 CALIFORNIA
6 16000 COLORADO
7 6338 CONNECTICUT
8 1181 DELAWARE
9 40661 FLORIDA
10 26482 GEORGIA
11 5779 HAWAII
12 1871 IDAHO
13 17652 ILLINOIS
14 13519 INDIANA
15 4342 IOWA
16 6957 KANSAS
17 7782 KENTUCKY
18 9741 LOUISIANA
19 814 MAINE
20 13151 MARYLAND
21 8069 MASSACHUSETTS
22 15764 MICHIGAN
23 7981 MINNESOTA
24 4243 MISSISSIPPI
25 16999 MISSOURI
26 2418 MONTANA
27 4001 NEBRASKA
28 11351 NEVADA
29 891 NEW HAMPSHIRE
30 11778 NEW JERSEY
31 8526 NEW MEXICO
32 15313 NEW YORK
33 13435 NORTH CAROLINA
34 1583 NORTH DAKOTA
35 17229 OHIO
36 10450 OKLAHOMA
37 10547 OREGON
38 12134 PENNSYLVANIA
39 1389 RHODE ISLAND
40 12970 SOUTH CAROLINA
41 1188 SOUTH DAKOTA
42 12870 TENNESSEE
43 67485 TEXAS
44 8707 UTAH
45 178 VERMONT
46 8111 VIRGINIA
47 26867 WASHINGTON
48 2239 WEST VIRGINIA
49 10985 WISCONSIN
50 592 WYOMING
finance1 <- finance |>
filter (Year == 2015 )
finance2015 <- finance1 |>
inner_join (x = crime15, y = finance1, by = c ("state" = "State" )) |>
select (1 : 12 , 23 , 24 , 41 ) |>
mutate (
percap_prison = Details.Correction.Correction.Total / population,
percap_edu = Details.Education.Education.Total / population,
percap_police = Details.Police.protection / population,
percap_violent = violent_crime / population
)
State Allocation vs Crime Rate Analysis 2016
crime16 <- read.csv ("data/crime16.csv" )
crime16 |>
mutate (State = if_else (state == "IOWA4" , "IOWA" , state)) |>
mutate (State = if_else (state == "NORTH CAROLINA5" , "NORTH CAROLINA" , state))
state population violent_crime murder_and_nonnegligent_manslaughter
1 ALABAMA 4863300 25886 407
2 ALASKA 741894 5966 52
3 ARIZONA 6931071 32583 380
4 ARKANSAS 2988248 16461 216
5 CALIFORNIA 39250017 174796 1930
6 COLORADO 5540545 18983 204
7 CONNECTICUT 3576452 8123 78
8 DELAWARE 952065 4844 56
9 FLORIDA 20612439 88700 1111
10 GEORGIA 10310371 40990 681
11 HAWAII 1428557 4417 35
12 IDAHO 1683140 3876 49
13 ILLINOIS 12801539 55854 1054
14 INDIANA 6633053 26845 439
15 IOWA 3134693 9110 71
16 KANSAS 2907289 11060 111
17 KENTUCKY 4436974 10308 260
18 LOUISIANA 4681666 26502 554
19 MAINE 1331479 1648 20
20 MARYLAND 6016447 28400 481
21 MASSACHUSETTS 6811779 25677 134
22 MICHIGAN 9928300 45572 598
23 MINNESOTA 5519952 13394 101
24 MISSISSIPPI 2988726 8383 238
25 MISSOURI 6093000 31644 537
26 MONTANA 1042520 3840 36
27 NEBRASKA 1907116 5550 49
28 NEVADA 2940058 19936 224
29 NEW HAMPSHIRE 1334795 2637 17
30 NEW JERSEY 8944469 21914 372
31 NEW MEXICO 2081015 14619 139
32 NEW YORK 19745289 74285 630
33 NORTH CAROLINA 10146788 37769 678
34 NORTH DAKOTA 757952 1903 15
35 OHIO 11614373 34877 654
36 OKLAHOMA 3923561 17648 245
37 OREGON 4093465 10830 113
38 PENNSYLVANIA 12784227 40447 661
39 RHODE ISLAND 1056426 2524 29
40 SOUTH CAROLINA 4961119 24896 366
41 SOUTH DAKOTA 865454 3621 27
42 TENNESSEE 6651194 42097 486
43 TEXAS 27862596 121042 1478
44 UTAH 3051217 7407 72
45 VERMONT 624594 989 14
46 VIRGINIA 8411808 18302 484
47 WASHINGTON 7288000 22023 195
48 WEST VIRGINIA 1831102 6557 81
49 WISCONSIN 5778708 17679 229
50 WYOMING 585501 1430 20
rape robbery aggravated_assault property_crime burglary larceny_theft
1 1916 4686 18877 143362 34065 97574
2 1053 850 4011 24876 4053 17766
3 3290 7055 21858 206432 37736 150275
4 2143 2120 11982 97673 23771 66747
5 13702 54789 104375 1002070 188304 637010
6 3555 3528 11696 151850 23903 108336
7 763 2703 4579 64664 10045 47512
8 308 1359 3121 26334 5023 19791
9 7598 20175 59816 553812 100325 410352
10 3509 12205 24595 309770 63344 219625
11 619 994 2769 42753 6017 31082
12 719 213 2895 29357 6318 20962
13 4908 17827 32065 262306 47989 194407
14 2501 7330 16575 171759 34097 122931
15 1247 1148 6644 65391 15030 45378
16 1312 1671 7966 78367 14364 57066
17 1641 3369 5038 97158 20834 66438
18 1816 5576 18556 154386 34667 109380
19 412 266 950 21912 4003 17134
20 1756 10289 15874 137445 24692 100919
21 2128 5365 18050 106339 19193 79088
22 7125 7120 30729 189620 39568 129876
23 2348 3728 7217 117756 18606 90422
24 1277 2397 4471 82732 23354 55054
25 2554 6570 21983 170549 31710 120544
26 578 266 2960 27976 3934 21299
27 994 946 3561 43163 6444 31994
28 1733 6340 11639 76047 18850 44017
29 582 427 1611 20194 2963 16360
30 1453 8984 11105 138152 25284 101540
31 1526 2737 10217 81931 17281 52907
32 6260 22316 45079 305181 39821 250968
33 2849 9336 24906 277765 72082 190377
34 342 181 1365 17402 3243 12195
35 5589 12523 16111 299357 66883 212807
36 2039 3162 12202 117037 29103 75779
37 1721 2278 6718 121345 16866 91286
38 4433 12326 23027 222795 35520 174228
39 442 540 1513 20058 3788 14674
40 2387 4035 18108 160928 32976 114032
41 509 272 2813 17141 3000 12639
42 2714 7813 31084 189835 40312 134404
43 13367 33317 72880 768947 148740 551151
44 1520 1541 4274 90058 12836 67834
45 178 106 691 10602 2103 8217
46 2737 4803 10278 156412 20018 126606
47 3077 5651 13100 254653 49180 173187
48 657 720 5099 37487 9301 25677
49 1979 4706 10765 111720 19425 82337
50 205 59 1146 11460 1771 8889
motor_vehicle_theft State
1 11723 ALABAMA
2 3057 ALASKA
3 18421 ARIZONA
4 7155 ARKANSAS
5 176756 CALIFORNIA
6 19611 COLORADO
7 7107 CONNECTICUT
8 1520 DELAWARE
9 43135 FLORIDA
10 26801 GEORGIA
11 5654 HAWAII
12 2077 IDAHO
13 19910 ILLINOIS
14 14731 INDIANA
15 4983 IOWA
16 6937 KANSAS
17 9886 KENTUCKY
18 10339 LOUISIANA
19 775 MAINE
20 11834 MARYLAND
21 8058 MASSACHUSETTS
22 20176 MICHIGAN
23 8728 MINNESOTA
24 4324 MISSISSIPPI
25 18295 MISSOURI
26 2743 MONTANA
27 4725 NEBRASKA
28 13180 NEVADA
29 871 NEW HAMPSHIRE
30 11328 NEW JERSEY
31 11743 NEW MEXICO
32 14392 NEW YORK
33 15306 NORTH CAROLINA
34 1964 NORTH DAKOTA
35 19667 OHIO
36 12155 OKLAHOMA
37 13193 OREGON
38 13047 PENNSYLVANIA
39 1596 RHODE ISLAND
40 13920 SOUTH CAROLINA
41 1502 SOUTH DAKOTA
42 15119 TENNESSEE
43 69056 TEXAS
44 9388 UTAH
45 282 VERMONT
46 9788 VIRGINIA
47 32286 WASHINGTON
48 2509 WEST VIRGINIA
49 9958 WISCONSIN
50 800 WYOMING
finance2 <- finance |>
filter (Year == 2016 )
finance2016 <- finance2 |>
inner_join (x = crime16, y = finance2, by = c ("state" = "State" )) |>
select (1 : 12 , 23 , 24 , 41 ) |>
mutate (
percap_prison = Details.Correction.Correction.Total / population,
percap_edu = Details.Education.Education.Total / population,
percap_police = Details.Police.protection / population,
percap_violent = violent_crime / population
)
State Allocation vs Crime Rate Analysis 2017
crime17 <- read.csv ("data/crime17.csv" )
crime17 |>
mutate (State = if_else (state == "IOWA4" , "IOWA" , state)) |>
mutate (State = if_else (state == "NORTH CAROLINA5" , "NORTH CAROLINA" , state))
state population violent_crime murder_and_nonnegligent_manslaughter
1 ALABAMA 4874747 25551 404
2 ALASKA 739795 6133 62
3 ARIZONA 7016270 35644 416
4 ARKANSAS 3004279 16671 258
5 CALIFORNIA 39536653 177627 1830
6 COLORADO 5607154 20638 221
7 CONNECTICUT 3588184 8180 102
8 DELAWARE 961939 4361 54
9 FLORIDA 20984400 85625 1057
10 GEORGIA 10429379 37258 703
11 HAWAII 1427538 3577 39
12 IDAHO 1716943 3888 32
13 ILLINOIS 12802023 56180 997
14 INDIANA 6666818 26598 397
15 IOWA 3145711 9230 104
16 KANSAS 2913123 12030 160
17 KENTUCKY 4454189 10056 263
18 LOUISIANA 4684333 26092 582
19 MAINE 1335907 1617 23
20 MARYLAND 6052177 30273 546
21 MASSACHUSETTS 6859819 24560 173
22 MICHIGAN 9962311 44826 569
23 MINNESOTA 5576606 13291 113
24 MISSISSIPPI 2984100 8526 245
25 MISSOURI 6113532 32420 600
26 MONTANA 1050493 3961 41
27 NEBRASKA 1920076 5873 43
28 NEVADA 2998039 16667 274
29 NEW HAMPSHIRE 1342795 2668 14
30 NEW JERSEY 9005644 20604 324
31 NEW MEXICO 2088070 16359 148
32 NEW YORK 19849399 70799 548
33 NORTH CAROLINA 10273419 37364 591
34 NORTH DAKOTA 755393 2125 10
35 OHIO 11658609 34683 710
36 OKLAHOMA 3930864 17934 242
37 OREGON 4142776 11674 104
38 PENNSYLVANIA 12805537 40120 739
39 RHODE ISLAND 1059639 2460 20
40 SOUTH CAROLINA 5024369 25432 390
41 SOUTH DAKOTA 869666 3771 25
42 TENNESSEE 6715984 43755 527
43 TEXAS 28304596 124238 1412
44 UTAH 3101833 7410 73
45 VERMONT 623657 1034 14
46 VIRGINIA 8470020 17632 453
47 WASHINGTON 7405743 22548 230
48 WEST VIRGINIA 1815857 6368 85
49 WISCONSIN 5795483 18539 186
50 WYOMING 579315 1376 15
rape robbery aggravated_assault property_crime burglary larceny_theft
1 2028 4217 18902 144160 31477 99842
2 863 951 4257 26204 4171 17775
3 3581 7440 24207 204515 37627 147830
4 2053 1935 12425 92489 21862 63374
5 14721 56622 104454 987114 176690 642033
6 3858 3838 12721 151483 22813 106809
7 837 2813 4428 63509 8890 47310
8 334 1082 2891 23477 3970 18138
9 7940 18597 58031 527220 88853 395453
10 2718 10044 23793 298298 55374 216661
11 567 1077 1894 40392 5549 29574
12 707 196 2953 28079 5655 20278
13 5556 17567 32060 257497 43459 193157
14 2625 6600 16976 161132 30140 115591
15 1234 1253 6639 66855 15078 46198
16 1627 1785 8458 81593 13931 59816
17 1661 2958 5174 94833 20195 64394
18 1867 5358 18285 157712 34265 112485
19 473 249 872 20133 3334 16006
20 1691 11200 16836 134496 23508 97420
21 2197 4871 17319 98575 17089 73946
22 7031 6488 30738 179318 35641 124104
23 2385 3621 7172 122212 18787 93446
24 1091 2071 5119 81581 24710 52240
25 2729 6351 22740 173253 30081 123251
26 613 295 3012 27225 3615 21018
27 1191 968 3671 43663 6472 31988
28 1890 4841 9662 78322 20049 45461
29 663 419 1572 18555 2574 15066
30 1505 7895 10880 140086 23891 104025
31 1259 3722 11230 82306 17917 52617
32 6324 20108 43819 300555 35002 252143
33 2715 9350 24708 261486 64786 180902
34 399 183 1533 16602 2942 11887
35 5859 11605 16509 282034 58573 203208
36 2142 3000 12550 113066 28608 72207
37 1999 2432 7139 123722 17705 88877
38 4201 11793 23387 211220 32057 166178
39 445 474 1521 18561 3217 13861
40 2504 3870 18668 160575 31306 115012
41 595 238 2913 16317 2707 12227
42 2934 7862 32432 197488 38716 140248
43 14470 32267 76089 725328 134066 523221
44 1697 1468 4172 86238 11817 64892
45 218 92 710 8960 1854 6912
46 2862 4332 9985 151855 18468 123215
47 3255 5390 13673 235027 43720 162511
48 795 524 4964 33630 7628 23000
49 2139 4345 11869 104802 17599 77735
50 263 76 1022 10604 1593 8232
motor_vehicle_theft State
1 12841 ALABAMA
2 4258 ALASKA
3 19058 ARIZONA
4 7253 ARKANSAS
5 168391 CALIFORNIA
6 21861 COLORADO
7 7309 CONNECTICUT
8 1369 DELAWARE
9 42914 FLORIDA
10 26263 GEORGIA
11 5269 HAWAII
12 2146 IDAHO
13 20881 ILLINOIS
14 15401 INDIANA
15 5579 IOWA
16 7846 KANSAS
17 10244 KENTUCKY
18 10962 LOUISIANA
19 793 MAINE
20 13568 MARYLAND
21 7540 MASSACHUSETTS
22 19573 MICHIGAN
23 9979 MINNESOTA
24 4631 MISSISSIPPI
25 19921 MISSOURI
26 2592 MONTANA
27 5203 NEBRASKA
28 12812 NEVADA
29 915 NEW HAMPSHIRE
30 12170 NEW JERSEY
31 11772 NEW MEXICO
32 13410 NEW YORK
33 15798 NORTH CAROLINA
34 1773 NORTH DAKOTA
35 20253 OHIO
36 12251 OKLAHOMA
37 17140 OREGON
38 12985 PENNSYLVANIA
39 1483 RHODE ISLAND
40 14257 SOUTH CAROLINA
41 1383 SOUTH DAKOTA
42 18524 TENNESSEE
43 68041 TEXAS
44 9529 UTAH
45 194 VERMONT
46 10172 VIRGINIA
47 28796 WASHINGTON
48 3002 WEST VIRGINIA
49 9468 WISCONSIN
50 779 WYOMING
finance3 <- finance |>
filter (Year == 2017 )
finance2017 <- finance3 |>
inner_join (x = crime17, y = finance3, by = c ("state" = "State" )) |>
select (1 : 12 , 23 , 24 , 41 ) |>
mutate (
percap_prison = Details.Correction.Correction.Total / population,
percap_edu = Details.Education.Education.Total / population,
percap_police = Details.Police.protection / population,
percap_violent = violent_crime / population
)
State Allocation vs Crime Rate Analysis 2018
crime18 <- read.csv ("data/crime18.csv" )
crime18 |>
mutate (State = if_else (state == "IOWA4" , "IOWA" , state)) |>
mutate (State = if_else (state == "NORTH CAROLINA5" , "NORTH CAROLINA" , state))
state population violent_crime murder_and_nonnegligent_manslaughter
1 ALABAMA 4887871 25399 383
2 ALASKA 737438 6526 47
3 ARIZONA 7171646 34058 369
4 ARKANSAS 3013825 16384 216
5 CALIFORNIA 39557045 176982 1739
6 COLORADO 5695564 22624 210
7 CONNECTICUT 3572665 7411 83
8 DELAWARE 967171 4097 48
9 FLORIDA 21299325 81980 1107
10 GEORGIA 10519475 34355 642
11 HAWAII 1420491 3532 36
12 IDAHO 1754208 3983 35
13 ILLINOIS 12741080 51490 884
14 INDIANA 6691878 25581 438
15 IOWA 3156145 7893 54
16 KANSAS 2911505 12782 113
17 KENTUCKY 4468402 9467 244
18 LOUISIANA 4659978 25049 530
19 MAINE 1338404 1501 24
20 MARYLAND 6042718 28320 490
21 MASSACHUSETTS 6902149 23337 136
22 MICHIGAN 9995915 44918 551
23 MINNESOTA 5611179 12369 106
24 MISSISSIPPI 2986530 6999 171
25 MISSOURI 6126452 30758 607
26 MONTANA 1062305 3974 34
27 NEBRASKA 1929268 5494 44
28 NEVADA 3034392 16420 202
29 NEW HAMPSHIRE 1356458 2349 21
30 NEW JERSEY 8908520 18537 286
31 NEW MEXICO 2095428 17949 167
32 NEW YORK 19542209 68495 562
33 NORTH CAROLINA 10383620 39210 628
34 NORTH DAKOTA 760077 2133 18
35 OHIO 11689442 32723 564
36 OKLAHOMA 3943079 18380 206
37 OREGON 4190713 11966 82
38 PENNSYLVANIA 12807060 39192 784
39 RHODE ISLAND 1057315 2317 16
40 SOUTH CAROLINA 5084127 24825 392
41 SOUTH DAKOTA 882235 3570 12
42 TENNESSEE 6770010 42226 498
43 TEXAS 28701845 117927 1322
44 UTAH 3161105 7368 60
45 VERMONT 626299 1077 10
46 VIRGINIA 8517685 17032 391
47 WASHINGTON 7535591 23472 236
48 WEST VIRGINIA 1805832 5236 67
49 WISCONSIN 5813568 17176 176
50 WYOMING 577737 1226 13
rape robbery aggravated_assault property_crime burglary larceny_theft
1 1996 4076 18944 137700 28841 95747
2 1192 896 4391 24339 3979 16364
3 3638 6523 23528 191974 31532 141303
4 2196 1594 12378 87793 19193 61487
5 15505 54326 105412 941618 164632 621775
6 4070 3797 14547 152163 21371 109119
7 840 2194 4294 60055 7948 44724
8 338 866 2845 22481 3158 17847
9 8438 16884 55551 486017 71933 372919
10 2651 8279 22783 270738 45369 200609
11 625 946 1925 40772 5631 29492
12 791 200 2957 25636 4940 18732
13 5859 14208 30539 246264 39080 187591
14 2370 5939 16834 145838 25268 105242
15 976 932 5931 53385 11127 37571
16 1567 1543 9559 76686 12537 56305
17 1707 2457 5059 87695 17190 60244
18 2085 4568 17866 152661 31132 109993
19 446 228 803 18173 2713 14683
20 1979 9716 16135 122864 18892 91835
21 2410 4143 16648 87196 13862 66728
22 7690 5656 31021 165280 31651 116178
23 2462 2944 6857 111874 16185 85561
24 537 1595 4696 71766 20839 46627
25 2912 5197 22042 162173 27257 115101
26 551 269 3120 26518 3257 20465
27 1233 756 3461 40126 5246 30006
28 2329 3862 10027 73985 17743 44338
29 534 359 1435 16935 1847 14219
30 1424 6364 10463 125156 19232 94887
31 1354 2830 13598 71657 16088 45390
32 6575 18187 43171 281507 31137 237233
33 2633 8423 27526 258979 62290 179057
34 397 158 1560 15507 2724 11008
35 5300 9185 17674 254496 48186 186401
36 2299 2791 13084 113364 26858 73217
37 1975 2549 7360 121278 16304 88418
38 4483 9848 24077 190816 27104 150596
39 481 454 1366 17561 2810 13220
40 2434 3553 18446 153421 29473 109616
41 614 262 2682 15251 2571 11156
42 2821 7190 31717 191279 33132 137708
43 14693 28256 73656 679430 117911 491702
44 1753 1236 4319 75156 9968 57460
45 287 70 710 8036 1467 6316
46 2924 3604 10113 141885 15574 115533
47 3413 5572 14251 222011 40201 154133
48 652 572 3945 26827 5354 18954
49 2248 3489 11263 90686 14099 67953
50 243 100 870 10313 1525 7949
motor_vehicle_theft State
1 13112 ALABAMA
2 3996 ALASKA
3 19139 ARIZONA
4 7113 ARKANSAS
5 155211 CALIFORNIA
6 21673 COLORADO
7 7383 CONNECTICUT
8 1476 DELAWARE
9 41165 FLORIDA
10 24760 GEORGIA
11 5649 HAWAII
12 1964 IDAHO
13 19593 ILLINOIS
14 15328 INDIANA
15 4687 IOWA
16 7844 KANSAS
17 10261 KENTUCKY
18 11536 LOUISIANA
19 777 MAINE
20 12137 MARYLAND
21 6606 MASSACHUSETTS
22 17451 MICHIGAN
23 10128 MINNESOTA
24 4300 MISSISSIPPI
25 19815 MISSOURI
26 2796 MONTANA
27 4874 NEBRASKA
28 11904 NEVADA
29 869 NEW HAMPSHIRE
30 11037 NEW JERSEY
31 10179 NEW MEXICO
32 13137 NEW YORK
33 17632 NORTH CAROLINA
34 1775 NORTH DAKOTA
35 19909 OHIO
36 13289 OKLAHOMA
37 16556 OREGON
38 13116 PENNSYLVANIA
39 1531 RHODE ISLAND
40 14332 SOUTH CAROLINA
41 1524 SOUTH DAKOTA
42 20439 TENNESSEE
43 69817 TEXAS
44 7728 UTAH
45 253 VERMONT
46 10778 VIRGINIA
47 27677 WASHINGTON
48 2519 WEST VIRGINIA
49 8634 WISCONSIN
50 839 WYOMING
finance4 <- finance |>
filter (Year == 2018 )
finance2018 <- finance4 |>
inner_join (x = crime18, y = finance4, by = c ("state" = "State" )) |>
select (1 : 12 , 23 , 24 , 41 ) |>
mutate (
percap_prison = Details.Correction.Correction.Total / population,
percap_edu = Details.Education.Education.Total / population,
percap_police = Details.Police.protection / population,
percap_violent = violent_crime / population
)
State Allocation vs Crime Rate Analysis 2019
crime19 <- read.csv ("data/crime19.csv" )
crime19 |>
mutate (State = if_else (state == "IOWA4" , "IOWA" , state)) |>
mutate (State = if_else (state == "NORTH CAROLINA5" , "NORTH CAROLINA" , state))
state population violent_crime murder_and_nonnegligent_manslaughter
1 ALABAMA 4903185 25046 358
2 ALASKA 731545 6343 69
3 ARIZONA 7278717 33141 365
4 ARKANSAS 3017804 17643 242
5 CALIFORNIA 39512223 174331 1690
6 COLORADO 5758736 21938 218
7 CONNECTICUT 3565287 6546 104
8 DELAWARE 973764 4115 48
9 FLORIDA 21477737 81270 1122
10 GEORGIA 10617423 36170 654
11 HAWAII 1415872 4042 48
12 IDAHO 1787065 4000 35
13 ILLINOIS 12671821 51561 832
14 INDIANA 6732219 24966 377
15 IOWA 3155070 8410 60
16 KANSAS 2913314 11968 105
17 KENTUCKY 4467673 9701 221
18 LOUISIANA 4648794 25537 544
19 MAINE 1344212 1548 20
20 MARYLAND 6045680 27456 542
21 MASSACHUSETTS 6892503 22578 152
22 MICHIGAN 9986857 43686 556
23 MINNESOTA 5639632 13332 117
24 MISSISSIPPI 2976149 8272 332
25 MISSOURI 6137428 30380 568
26 MONTANA 1068778 4328 27
27 NEBRASKA 1934408 5821 45
28 NEVADA 3080156 15210 143
29 NEW HAMPSHIRE 1359711 2074 33
30 NEW JERSEY 8882190 18375 262
31 NEW MEXICO 2096829 17450 181
32 NEW YORK 19453561 69764 558
33 NORTH CAROLINA 10488084 38995 632
34 NORTH DAKOTA 762062 2169 24
35 OHIO 11689100 34269 538
36 OKLAHOMA 3956971 17086 266
37 OREGON 4217737 11995 116
38 PENNSYLVANIA 12801989 39228 669
39 RHODE ISLAND 1059361 2342 25
40 SOUTH CAROLINA 5148714 26323 464
41 SOUTH DAKOTA 884659 3530 17
42 TENNESSEE 6829174 40647 498
43 TEXAS 28995881 121474 1409
44 UTAH 3205958 7553 72
45 VERMONT 623989 1262 11
46 VIRGINIA 8535519 17753 426
47 WASHINGTON 7614893 22377 198
48 WEST VIRGINIA 1792147 5674 78
49 WISCONSIN 5822434 17070 175
50 WYOMING 578759 1258 13
rape robbery aggravated_assault property_crime burglary larceny_theft
1 2068 3941 18679 131133 26079 92477
2 1088 826 4360 21294 3563 15114
3 3662 6410 22704 177638 28699 130788
4 2331 1557 13513 86250 18095 60735
5 14799 52301 105541 921114 152555 626802
6 3872 3663 14185 149189 20064 107012
7 771 1929 3742 50862 6441 38457
8 310 790 2967 21931 2968 17359
9 8456 16217 55475 460846 63396 358402
10 2922 7961 24633 252249 39506 188967
11 765 1131 2098 40228 5340 29634
12 809 155 3001 21793 3927 16295
13 6078 12464 32187 233984 34433 180776
14 2475 5331 16783 132694 21795 97176
15 1164 863 6323 54699 11710 37847
16 1416 1293 9154 67428 9984 50165
17 1572 2161 5747 84769 15443 59130
18 2273 4025 18695 146993 26918 109359
19 516 188 824 16743 2350 13667
20 1913 9203 15798 117901 16862 89780
21 2204 3613 16609 81317 12341 62844
22 7235 5350 30545 158296 28572 111980
23 2448 3149 7618 117236 15927 90092
24 747 1700 5493 70707 18660 46300
25 2917 4959 21936 161946 26414 114460
26 624 205 3472 23440 2887 18176
27 1253 792 3731 39449 4745 29719
28 2161 3286 9620 71525 15510 44755
29 590 313 1138 16442 1717 13832
30 1531 5730 10852 118637 16399 91902
31 1288 2341 13640 65269 14610 41702
32 6583 18068 44555 267155 27600 226851
33 3247 7599 27517 247236 54447 174728
34 437 179 1529 15066 2608 10666
35 5731 8846 19154 240291 43894 177725
36 2268 2369 12183 112587 26577 72632
37 1778 2276 7825 115170 14724 85261
38 4351 9743 24465 179665 23354 143921
39 491 418 1408 16259 2321 12580
40 2460 3294 20105 151389 27461 108953
41 642 195 2676 15667 2646 11265
42 2813 6150 31186 181153 29869 132104
43 14824 28988 76253 693204 113902 501813
44 1822 1125 4534 69546 8871 53937
45 278 71 902 8888 1275 7315
46 2816 3524 10987 140213 13900 116044
47 3332 5147 13700 204224 34540 145282
48 754 378 4464 28376 5891 20066
49 2261 2991 11643 85672 12667 65620
50 324 67 854 9093 1396 6984
motor_vehicle_theft State
1 12577 ALABAMA
2 2617 ALASKA
3 18151 ARIZONA
4 7420 ARKANSAS
5 141757 CALIFORNIA
6 22113 COLORADO
7 5964 CONNECTICUT
8 1604 DELAWARE
9 39048 FLORIDA
10 23776 GEORGIA
11 5254 HAWAII
12 1571 IDAHO
13 18775 ILLINOIS
14 13723 INDIANA
15 5142 IOWA
16 7279 KANSAS
17 10196 KENTUCKY
18 10716 LOUISIANA
19 726 MAINE
20 11259 MARYLAND
21 6132 MASSACHUSETTS
22 17744 MICHIGAN
23 11217 MINNESOTA
24 5747 MISSISSIPPI
25 21072 MISSOURI
26 2377 MONTANA
27 4985 NEBRASKA
28 11260 NEVADA
29 893 NEW HAMPSHIRE
30 10336 NEW JERSEY
31 8957 NEW MEXICO
32 12704 NEW YORK
33 18061 NORTH CAROLINA
34 1792 NORTH DAKOTA
35 18672 OHIO
36 13378 OKLAHOMA
37 15185 OREGON
38 12390 PENNSYLVANIA
39 1358 RHODE ISLAND
40 14975 SOUTH CAROLINA
41 1756 SOUTH DAKOTA
42 19180 TENNESSEE
43 77489 TEXAS
44 6738 UTAH
45 298 VERMONT
46 10269 VIRGINIA
47 24402 WASHINGTON
48 2419 WEST VIRGINIA
49 7385 WISCONSIN
50 713 WYOMING
finance5 <- finance |>
filter (Year == 2019 )
finance2019 <- finance5 |>
inner_join (x = crime19, y = finance5, by = c ("state" = "State" )) |>
select (1 : 12 , 23 , 24 , 41 ) |>
mutate (
percap_prison = Details.Correction.Correction.Total / population,
percap_edu = Details.Education.Education.Total / population,
percap_police = Details.Police.protection / population,
percap_violent = violent_crime / population
)
crime15 <- crime15 |>
select ("state" ,"population" ,"violent_crime" )
colnames (crime15) <- c ("state" ,"population15" ,"violent15" )
crime15 <- crime15 |>
mutate (vrate15 = violent15/ population15) |>
select ("state" ,"vrate15" ,"population15" )
crime16 <- crime16 |>
select ("state" ,"population" ,"violent_crime" )
colnames (crime16) <- c ("state" ,"population16" ,"violent16" )
crime16 <- crime16 |>
mutate (vrate16 = violent16/ population16) |>
select ("state" ,"vrate16" ,"population16" )
crime17 <- crime17 |>
select ("state" ,"population" ,"violent_crime" )
colnames (crime17) <- c ("state" ,"population17" ,"violent17" )
crime17 <- crime17 |>
mutate (vrate17 = violent17/ population17)|>
select ("state" ,"vrate17" ,"population17" )
crime18 <- crime18 |>
select ("state" ,"population" ,"violent_crime" )
colnames (crime18) <- c ("state" ,"population18" ,"violent18" )
crime18 <- crime18 |>
mutate (vrate18 = violent18/ population18) |>
select ("state" ,"vrate18" ,"population18" )
crime19 <- crime19 |>
select ("state" ,"population" ,"violent_crime" )
colnames (crime19) <- c ("state" ,"population19" ,"violent19" )
crime19 <- crime19 |>
mutate (vrate19 = violent19/ population19) |>
select ("state" ,"vrate19" ,"population19" )
finance2 <- finance |>
filter (Year %in% c ("2015" , "2016" ,"2017" , "2018" , "2019" ))
crime_join <- crime15 |>
inner_join (x= crime17,y= crime15, by= "state" )
crime_join <- crime_join |>
inner_join (x= crime18,y= crime_join, by= "state" )
crime_join <- crime_join |>
inner_join (x= crime19,y= crime_join, by= "state" )
crime_join <- crime_join |>
inner_join (x= crime16,y= crime_join, by= "state" )
data2 <- crime_join |>
select ("state" ,"population19" ,"population18" ,"population17" ,"population16" ,
"population15" )
colnames (data2) <- c ("state" ,"2019" ,"2018" ,"2017" ,"2016" ,"2015" )
data2_pivot <- data2 |>
pivot_longer (
cols = c ('2015' ,'2016' ,'2017' ,'2018' ,'2019' ),
names_to = "Year" ,
names_transform = parse_number,
values_to = "Population"
)
crime_join <- crime_join |>
select ("state" ,"vrate19" ,"vrate18" ,"vrate17" ,"vrate16" ,"vrate15" )
colnames (crime_join) <- c ("state" ,"2019" ,"2018" ,"2017" ,"2016" ,"2015" )
crime_pivot <- crime_join |>
pivot_longer (
cols = c ('2015' ,'2016' ,'2017' ,'2018' ,'2019' ),
names_to = "Year" ,
names_transform = parse_number,
values_to = "rate"
)
finance_crime <- crime_pivot |>
inner_join (x= finance2,y= crime_pivot,by= c ("State" = "state" ,"Year" )) |>
select (1 ,2 ,4 ,13 ,14 ,17 ,22 ,31 ,32 )
finance_crime <- finance_crime |>
inner_join (x= data2_pivot,y= finance_crime,by= c ("state" = "State" ,"Year" ))
colnames (finance_crime) <- c ("State" ,"Year" ,"Population" ,"Total_Revenue" , "Correction"
,"Education" , "Health" ,"Welfare" ,"Police" ,"Violent_Rate" )
finance_crime <- finance_crime |>
mutate (
percap_prison = Correction / Population,
percap_edu = Education / Population,
percap_police = Police / Population)
#export finance crim as csv
write_csv (finance_crime, "data/finance_crime.csv" )
Data limitations
----------------------------------------------------------------------------------------
Research question(s)
What is the scale of food loss and waste across the food supply chain, and how has this changed over time and across regions?
How does food waste affect a country’s food insecurity and nutrition and how has this changed over time and across nations?
Data collection and cleaning
Have an initial draft of your data cleaning appendix. Document every step that takes your raw data file(s) and turns it into the analysis-ready data set that you would submit with your final project. Include text narrative describing your data collection (downloading, scraping, surveys, etc) and any additional data curation/cleaning (merging data frames, filtering, transformations of variables, etc). Include code for data curation/cleaning, but not collection.
downloaded data from these two links: https://www.fao.org/faostat/en/#data/FS https://www.fao.org/platform-food-loss-waste/flw-data/en
named dataset1 food_waste.csv and uploaded it to data
named dataset 2 newnutrition.csv and uploaded it to data
import dataset1 and selected necessary columns: country, year, commodity, loss_percentage, activity, food_supply_stage and only taking value with year greater than 2009 (to match it with values from dataset2). Since dataset 2 seems to only have relevant data until 2019, we only included values from 2009-2019.
#| label: clean-food-waste-dataset
food_waste <- read_csv ("data/food_waste.csv" , na = c ("N/A" , "" ))
Rows: 27773 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (15): country, region, cpc_code, commodity, loss_percentage_original, lo...
dbl (3): m49_code, year, loss_percentage
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
select_food_waste <- select (.data= food_waste, country, year, commodity, loss_percentage, activity, food_supply_stage ) |>
arrange (year) |>
subset (year >= 2009 & year < 2020 )
#this is weird.. some values for total loss is greater than 100%.. which isnt right but i think it is bc there are repeats and multiple stages in the food loss process (see select_food_waste)
waste_for_merge <- select_food_waste |>
group_by (country, year, commodity = "Wheat" ) |>
summarize (total_loss_percentage = sum (loss_percentage, na.rm = TRUE ))
`summarise()` has grouped output by 'country', 'year'. You can override using
the `.groups` argument.
import dataset2 and select necessary columns: all columns with one year, after importing the data it seems like the values for the year 2020 and 2021 are mostly NA, thus we decided to remove these years. After this, I pivoted the table so that there could be a row for each year and a column for each nutrition statistic (obesity and anemia rates).
nutrition <- read_csv ("data/newnutrition.csv" , na = c ("N/A" , "" ))
New names:
Rows: 411 Columns: 69
── Column specification
──────────────────────────────────────────────────────── Delimiter: "," chr
(69): ...1, ...2, ...3, 2008-2010...4, 2008-2010...5, 2009...6, 2009...7...
ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
Specify the column types or set `show_col_types = FALSE` to quiet this message.
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `2008-2010` -> `2008-2010...4`
• `2008-2010` -> `2008-2010...5`
• `2009` -> `2009...6`
• `2009` -> `2009...7`
• `2009-2010` -> `2009-2010...8`
• `2009-2010` -> `2009-2010...9`
• `2009-2011` -> `2009-2011...10`
• `2009-2011` -> `2009-2011...11`
• `2010` -> `2010...12`
• `2010` -> `2010...13`
• `2010-2011` -> `2010-2011...14`
• `2010-2011` -> `2010-2011...15`
• `2010-2012` -> `2010-2012...16`
• `2010-2012` -> `2010-2012...17`
• `2011` -> `2011...18`
• `2011` -> `2011...19`
• `2011-2012` -> `2011-2012...20`
• `2011-2012` -> `2011-2012...21`
• `2011-2013` -> `2011-2013...22`
• `2011-2013` -> `2011-2013...23`
• `2012` -> `2012...24`
• `2012` -> `2012...25`
• `2012-2013` -> `2012-2013...26`
• `2012-2013` -> `2012-2013...27`
• `2012-2014` -> `2012-2014...28`
• `2012-2014` -> `2012-2014...29`
• `2013` -> `2013...30`
• `2013` -> `2013...31`
• `2013-2014` -> `2013-2014...32`
• `2013-2014` -> `2013-2014...33`
• `2013-2015` -> `2013-2015...34`
• `2013-2015` -> `2013-2015...35`
• `2014` -> `2014...36`
• `2014` -> `2014...37`
• `2014-2015` -> `2014-2015...38`
• `2014-2015` -> `2014-2015...39`
• `2014-2016` -> `2014-2016...40`
• `2014-2016` -> `2014-2016...41`
• `2015` -> `2015...42`
• `2015` -> `2015...43`
• `2015-2016` -> `2015-2016...44`
• `2015-2016` -> `2015-2016...45`
• `2015-2017` -> `2015-2017...46`
• `2015-2017` -> `2015-2017...47`
• `2016` -> `2016...48`
• `2016` -> `2016...49`
• `2016-2018` -> `2016-2018...50`
• `2016-2018` -> `2016-2018...51`
• `2017` -> `2017...52`
• `2017` -> `2017...53`
• `2017-2019` -> `2017-2019...54`
• `2017-2019` -> `2017-2019...55`
• `2018` -> `2018...56`
• `2018` -> `2018...57`
• `2018-2020` -> `2018-2020...58`
• `2018-2020` -> `2018-2020...59`
• `2019` -> `2019...60`
• `2019` -> `2019...61`
• `2019-2021` -> `2019-2021...62`
• `2019-2021` -> `2019-2021...63`
• `2020` -> `2020...64`
• `2020` -> `2020...65`
• `2020-2022` -> `2020-2022...66`
• `2020-2022` -> `2020-2022...67`
• `2021` -> `2021...68`
• `2021` -> `2021...69`
#selecting columns with single year values and removing row 1
nutrition_subset <- nutrition[, c (1 ,3 ,7 , 13 , 19 , 25 , 31 , 37 , 43 , 49 ,53 , 57 , 61 )] |>
slice (- 1 )
#renaming columns
names (nutrition_subset) <- substr (names (nutrition_subset), 1 , 4 )
names (nutrition_subset)[which (names (nutrition_subset) == "...1" )] <- "country"
names (nutrition_subset)[which (names (nutrition_subset) == "...3" )] <- "nutrition_stat"
#pivot
first_nutrition_pivot <- nutrition_subset|>
pivot_longer (cols = 3 : 13 , names_to = "year" , values_to= "values" )
second_nutrition_pivot <- first_nutrition_pivot |>
pivot_wider (names_from = "nutrition_stat" , values_from = "values" )
Warning: Values from `values` are not uniquely identified; output will contain list-cols.
* Use `values_fn = list` to suppress this warning.
* Use `values_fn = {summary_fun}` to summarise duplicates.
* Use the following dplyr code to identify duplicates.
{data} %>%
dplyr::group_by(country, year, nutrition_stat) %>%
dplyr::summarise(n = dplyr::n(), .groups = "drop") %>%
dplyr::filter(n > 1L)
#removing null column
clean_nutrition <- second_nutrition_pivot[,c (1 ,2 ,3 ,4 )]
Data description
Have an initial draft of your data description section. Your data description should be about your analysis-ready data.
What are the observations (rows) and the attributes (columns)?
The observations of the select_food_waste dataset are countries and the attributes for each row include year, commodity, loss_percentage, activity, and food_supply_stage. The observations of the clean_nutrition dataset are countries and the attributes for each row include year, prevalence of anemia among reproductive age, and prevalence of obesity in the adult population.
Why was this dataset created?
These two datasets were created so that the relationship between nutrition and food waste conditions in various countries can be analyzed to see whether there is a correlation.
Who funded the creation of the dataset?
The original data was collected by the Food and Agriculture Organization of the United Nations (FAO) and was meant to provide information on country, region, year, food insecurity values and nutrition values.
What processes might have influenced what data was observed and recorded and what was not?
Because the clean_nutrition dataset involves collecting data on people’s health conditions related to anemia and obesity, the process of getting approval for using people’s medical records or other personal information might cause some of the data to be biased since the dataset would probably not include the data of people who did not allow their information to be used or might have skewed data if the information was gained through self-reporting. As a result of the privacy issue, the observed data is probably not the most accurate of the entire population. Additionally, other more specific individual health data including height, weight, BMI, or diet also is not easily available since it concerns people giving out their private information. For the select_food_waste dataset, the loss_percentage attribute might be a little skewed since the there are missing values and a lot of repeated values. As a result, if a country had a natural disaster or a drought or other event that caused the data to be significantly different in a certain year, then the data could influence the actual average amount of food waste of that country since it doesn’t take into account confounding variables.
What preprocessing was done, and how did the data come to be in the form that you are using?
For the food_waste data, na values were removed and the selected columns of year, commodity, loss percentage, activity, and food supply stage were arranged by country and the years from 2009-2019. The organized data was cleaned to be the select_food_waste dataset. For the nutrition data, na values and unuseful rows were first removed. Then columns were renamed and pivoted so that clean_nutrition could be organized by country and the years from 2009-2019. By organizing both datasets by country and year, they can easily be compared and visualized.
Data limitations
For the select_food_waste dataset, there are a lot of missing values for the loss percentages for commodities for some years. Additionally, there are some repeating values, meaning that the values for certain commodities are reported to cycle through the exact same loss percentages for several years at a time. While this could be a pattern, it does not seem likely that this would be accurate. Besides the missing data for loss percentage, it also does not include a meaningful attribute accounting for whether there were any significant events that affect production, which is an important factor that would influence total average food waste of a country. Although the original data included a cause_of_loss column, there were too many na values to be considered useful for explaining certain changes in the loss percentage. The clean_nutrition data also has missing values for various countries. Since it also collects health data that some people may not be comfortable with sharing, the actual values might be slightly biased.
Exploratory data analysis
Perform an (initial) exploratory data analysis.
# | label: waste_rice_year_scatter
waste_rice <- select_food_waste |>
filter (commodity == "Rice" ) |>
filter (! is.na (activity))
waste_rice |>
group_by (year) |>
mutate (mean_loss = mean (loss_percentage)) |>
ggplot (aes (x = year, y = mean_loss)) +
geom_point () +
labs (
title = "Year vs Mean Loss" ,
y = "Mean loss"
)
# | label: waste_rice_country_scatter
waste_rice2 <- waste_rice |>
filter (country %in% c ('Kenya' , "Uganda" , "Togo" , "Senegal" , "Benin" , "Sierra Leone" ))
ggplot (waste_rice2, aes (x = year, y = loss_percentage)) +
geom_point () +
labs (
title = "Year vs Loss Percentage by Country" ,
y = "Loss Percentage"
) +
facet_wrap (~ country)
# | label: anemia_country_scatter
# Anemia
second_nutrition_pivot1 <- second_nutrition_pivot |>
filter (country %in% c ('Ukraine' , 'Armenia' , 'Angola' , 'Afghanistan' , 'United States of America' ))
second_nutrition_pivot1 <- second_nutrition_pivot1 |>
set_names (names (second_nutrition_pivot1) |> str_replace_all (" " , "_" ))
names (second_nutrition_pivot1)[3 ] <- substr (names (second_nutrition_pivot1)[3 ],
20 , 20 )
second_nutrition_pivot1$ a = as.numeric (as.character (second_nutrition_pivot1$ a))
ggplot (second_nutrition_pivot1, aes (x = year, y = a, color = country)) +
geom_point () +
labs (
title = "Year vs Prevalence of anemia in women of reproductive age (15-49 years)" ,
y = "Prevalence of anemia"
)
# | label: obesity_country_scatter
# Obesity
second_nutrition_pivot2 <- second_nutrition_pivot |>
filter (country %in% c ('Ukraine' , 'Armenia' , 'Angola' , 'Afghanistan' , 'United States of America' )) |>
filter (year < 2017 )
second_nutrition_pivot2 <- second_nutrition_pivot2 |>
set_names (names (second_nutrition_pivot2) |> str_replace_all (" " , "_" ))
names (second_nutrition_pivot2)[4 ] <- substr (names (second_nutrition_pivot2)[4 ], 15 , 21 )
second_nutrition_pivot2$ obesity = as.numeric (as.character (second_nutrition_pivot2$ obesity))
second_nutrition_pivot2 |>
filter (obesity != "NANA" )
# A tibble: 40 × 5
country year Prevalence_of_anemia_among_women_of_reprod…¹ obesity `NA`
<chr> <chr> <list> <dbl> <list>
1 Afghanistan 2009 <chr [1]> 3.8 <NULL>
2 Afghanistan 2010 <chr [1]> 4 <NULL>
3 Afghanistan 2011 <chr [1]> 4.2 <NULL>
4 Afghanistan 2012 <chr [1]> 4.4 <NULL>
5 Afghanistan 2013 <chr [1]> 4.7 <NULL>
6 Afghanistan 2014 <chr [1]> 4.9 <NULL>
7 Afghanistan 2015 <chr [1]> 5.2 <NULL>
8 Afghanistan 2016 <chr [1]> 5.5 <NULL>
9 Angola 2009 <chr [1]> 5.9 <NULL>
10 Angola 2010 <chr [1]> 6.2 <NULL>
# ℹ 30 more rows
# ℹ abbreviated name:
# ¹`Prevalence_of_anemia_among_women_of_reproductive_age_(15-49_years)`
ggplot (second_nutrition_pivot2, aes (x = year, y = obesity, color = country)) +
geom_point () +
labs (
title = "Year vs Prevalence of obesity in the adult population (18+ years)" ,
y = "Prevalence of obesity"
)
Questions for reviewers
List specific questions for your peer reviewers and project mentor to answer in giving you feedback on this phase.
Does the select_food_waste dataset provide sufficient data for us to continue analyzing?
Does the waste_rice dataset seem like it contains sufficient data for us to continue working with?
What other relationships between food waste and anemia/obesity could we explore?