本レポートでは、MASSパッケージに含まれるbirthwtデータセットを用いて、新生児体重と各種要因との関係を分析します。
library(MASS)
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.2
## ── Attaching core tidyverse packages ─────────────────
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ──────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::select() masks MASS::select()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(gtsummary)
## Warning: package 'gtsummary' was built under R version 4.4.2
##
## Attaching package: 'gtsummary'
##
## The following object is masked from 'package:MASS':
##
## select
library(patchwork)
##
## Attaching package: 'patchwork'
##
## The following object is masked from 'package:MASS':
##
## area
data(birthwt)
df <- birthwt %>%
mutate(race = factor(race, labels = c("White", "Black", "Other")),
smoke = factor(smoke, labels = c("Non-smoker", "Smoker")))
# 年齢と体重の散布図
p1 <- ggplot(df, aes(x = age, y = bwt)) +
geom_point() +
geom_smooth(method = "lm") +
labs(title = "年齢と出生時体重の関係",
x = "母親の年齢", y = "出生時体重 (g)")
# 体重と体重の散布図
p2 <- ggplot(df, aes(x = lwt, y = bwt)) +
geom_point() +
geom_smooth(method = "lm") +
labs(title = "母体重と出生時体重の関係",
x = "母体重 (lbs)", y = "出生時体重 (g)")
# 人種と体重のボックスプロット
p3 <- ggplot(df, aes(x = race, y = bwt, fill = race)) +
geom_boxplot() +
labs(title = "人種と出生時体重の関係",
x = "人種", y = "出生時体重 (g)")
# 喫煙と体重のボックスプロット
p4 <- ggplot(df, aes(x = smoke, y = bwt, fill = smoke)) +
geom_boxplot() +
labs(title = "喫煙状態と出生時体重の関係",
x = "喫煙状態", y = "出生時体重 (g)")
# ptlと体重のボックスプロット
p5 <- ggplot(df, aes(x = factor(ptl), y = bwt)) +
geom_boxplot() +
labs(title = "早産歴と出生時体重の関係",
x = "早産回数", y = "出生時体重 (g)")
# htと体重のボックスプロット
p6 <- ggplot(df, aes(x = factor(ht), y = bwt)) +
geom_boxplot() +
labs(title = "高血圧と出生時体重の関係",
x = "高血圧", y = "出生時体重 (g)")
(p1 + p2) / (p3 + p4) / (p5 + p6)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
人種と年齢の交互作用を含む線形回帰モデルの結果は以下の通りです:
model <- lm(bwt ~ race * age, data = df)
tbl_regression(model)
Characteristic | Beta | 95% CI1 | p-value |
---|---|---|---|
race | |||
White | — | — | |
Black | 1,023 | -347, 2,392 | 0.14 |
Other | 326 | -750, 1,402 | 0.6 |
age | 21 | -4.1, 47 | 0.10 |
race * age | |||
Black * age | -63 | -123, -2.0 | 0.043 |
Other * age | -26 | -72, 20 | 0.3 |
1 CI = Confidence Interval |
人種ごとの年齢と出生時体重の関係を示す散布図と回帰直線は以下の通りです:
ggplot(df, aes(x = age, y = bwt, color = race)) +
geom_point() +
geom_smooth(method = "lm") +
labs(title = "人種別の年齢と出生時体重の関係",
x = "母親の年齢",
y = "出生時体重 (g)")
## `geom_smooth()` using formula = 'y ~ x'
この分析から、以下のような知見が得られました: