# CSVファイルの読み込み
df <- read_csv("datasheet.csv")
## New names:
## Rows: 1510 Columns: 14
## ── Column specification
## ────────────────────────────────────────────────────────────
## Delimiter: "," chr (1): HbA1c dbl (12): ...1, ID, age, sex, race, weight, height,
## smoke, drink, regular_e... date (1): visit_date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ Specify
## the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
# データ構造の確認
glimpse(df)
## Rows: 1,510
## Columns: 14
## $ ...1 [3m[38;5;246m<dbl>[39m[23m 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16…
## $ ID [3m[38;5;246m<dbl>[39m[23m 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4,…
## $ visit_date [3m[38;5;246m<date>[39m[23m 2020-01-14, 2020-03-19, 2020-07-07, 2020-08-15, 2020…
## $ age [3m[38;5;246m<dbl>[39m[23m 64, 64, 64, 64, 60, 60, 60, 60, 54, 54, 54, 54, 54, 5…
## $ sex [3m[38;5;246m<dbl>[39m[23m 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ race [3m[38;5;246m<dbl>[39m[23m 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ weight [3m[38;5;246m<dbl>[39m[23m 93.1, 93.1, 93.1, 93.1, 68.4, 68.4, 68.4, 68.4, 77.7,…
## $ height [3m[38;5;246m<dbl>[39m[23m 173.3, 173.3, 173.3, 173.3, 168.1, 168.1, 168.1, 168.…
## $ smoke [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ drink [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,…
## $ regular_exercise [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ interv [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,…
## $ HbA1c [3m[38;5;246m<chr>[39m[23m "5.94126651006355", [31mNA[39m, "6.42409394851312", "6.643686…
## $ CRE [3m[38;5;246m<dbl>[39m[23m 0.6574349, 0.1556760, 0.2058530, 0.8931811, 0.5259037…
# 欠損値の確認
colSums(is.na(df))
## ...1 ID visit_date age
## 0 0 0 0
## sex race weight height
## 0 0 0 0
## smoke drink regular_exercise interv
## 0 0 0 0
## HbA1c CRE
## 287 0
# 日付型への変換
df <- df %>%
mutate(
visit_date = ymd(visit_date),
# regular_exerciseを因子型に変換
regular_exercise = factor(regular_exercise),
# sexを因子型に変換
sex = factor(sex, levels = c(1, 2), labels = c("男性", "女性")),
# raceを因子型に変換
race = factor(race, levels = c(1, 2, 3), labels = c("白人", "黒人", "その他")),
# smoke, drinkを因子型に変換
smoke = factor(smoke, levels = c(0, 1), labels = c("非喫煙", "喫煙")),
drink = factor(drink, levels = c(0, 1), labels = c("飲酒なし", "飲酒あり")),
# intervを因子型に変換
interv = factor(interv, levels = c(0, 1), labels = c("対照群", "介入群"))
)
# BMIの計算
df <- df %>%
mutate(bmi = weight / ((height/100)^2))
# クレアチニンクリアランスの計算(Cockcroft-Gault式)
df <- df %>%
mutate(
CrCL = case_when(
sex == "男性" ~ (140 - age) * weight / (72 * CRE),
sex == "女性" ~ (140 - age) * weight / (72 * CRE) * 0.85
)
)
# 患者ごとの最初と最後のvisit_date
df <- df %>%
group_by(ID) %>%
mutate(
first_visit_date = min(visit_date),
last_visit_date = max(visit_date)
) %>%
ungroup()
# HbA1cが7以上の場合のイベント作成
df <- df %>%
mutate(event = ifelse(HbA1c >= 7, 1, 0))
# 初回来院時のHbA1c >= 7または欠測の患者を除外
excluded_ids <- df %>%
group_by(ID) %>%
filter(visit_date == first_visit_date) %>%
filter(HbA1c >= 7 | is.na(HbA1c)) %>%
pull(ID)
df_clean <- df %>%
filter(!ID %in% excluded_ids)
# baselineデータフレーム
baseline <- df_clean %>%
group_by(ID) %>%
filter(visit_date == first_visit_date) %>%
ungroup()
# event_dateデータフレーム
event_date <- df_clean %>%
group_by(ID) %>%
filter(event == 1) %>%
filter(visit_date == min(visit_date)) %>%
select(ID, visit_date, event) %>%
rename(
event_date = visit_date,
status = event
) %>%
ungroup()
# df_mergedデータフレーム
df_merged <- baseline %>%
left_join(event_date, by = "ID") %>%
mutate(
status = replace_na(status, 0),
time = case_when(
status == 0 ~ as.numeric(last_visit_date - first_visit_date),
status == 1 ~ as.numeric(event_date - first_visit_date)
)
)
# 年齢カテゴリーの作成
baseline <- baseline %>%
mutate(
age_cat = cut(age,
breaks = c(-Inf, 40, 60, 80, Inf),
labels = c("40歳以下", "41-60歳", "61-80歳", "81歳以上")),
CrCL_cat = cut(CrCL,
breaks = c(-Inf, 30, 60, Inf),
labels = c("30未満", "30-60", "60以上"))
)
# 背景表の作成
vars <- c("age", "age_cat", "sex", "smoke", "drink", "CrCL", "CrCL_cat",
"bmi", "regular_exercise")
baseline_table <- CreateTableOne(
vars = vars,
strata = "interv",
data = baseline,
test = TRUE
)
print(baseline_table,
nonnormal = c("age", "CrCL", "bmi"),
showAllLevels = TRUE)
## Stratified by interv
## level 対照群 介入群
## n 117 105
## age (median [IQR]) 51.00 [44.00, 57.00] 51.00 [44.00, 56.00]
## age_cat (%) 40歳以下 20 (17.1) 17 (16.2)
## 41-60歳 82 (70.1) 73 (69.5)
## 61-80歳 15 (12.8) 15 (14.3)
## 81歳以上 0 ( 0.0) 0 ( 0.0)
## sex (%) 男性 57 (48.7) 61 (58.1)
## 女性 60 (51.3) 44 (41.9)
## smoke (%) 非喫煙 82 (70.1) 67 (63.8)
## 喫煙 35 (29.9) 38 (36.2)
## drink (%) 飲酒なし 68 (58.1) 65 (61.9)
## 飲酒あり 49 (41.9) 40 (38.1)
## CrCL (median [IQR]) 182.21 [105.64, 257.95] 162.42 [103.45, 291.23]
## CrCL_cat (%) 30未満 0 ( 0.0) 1 ( 1.0)
## 30-60 8 ( 6.8) 4 ( 3.8)
## 60以上 109 (93.2) 100 (95.2)
## bmi (median [IQR]) 25.87 [21.76, 28.84] 23.80 [20.78, 28.37]
## regular_exercise (%) 0 28 (23.9) 28 (26.7)
## 1 32 (27.4) 35 (33.3)
## 2 42 (35.9) 33 (31.4)
## 3 15 (12.8) 9 ( 8.6)
## Stratified by interv
## p test
## n
## age (median [IQR]) 0.926 nonnorm
## age_cat (%) NaN
##
##
##
## sex (%) 0.207
##
## smoke (%) 0.395
##
## drink (%) 0.662
##
## CrCL (median [IQR]) 0.846 nonnorm
## CrCL_cat (%) 0.354
##
##
## bmi (median [IQR]) 0.102 nonnorm
## regular_exercise (%) 0.558
##
##
##
# 生存曲線の作成
fit <- survfit(Surv(time, status) ~ interv, data = df_merged)
# プロット
ggsurvplot(
fit,
data = df_merged,
risk.table = TRUE,
conf.int = TRUE,
xlab = "観察期間(日)",
ylab = "無イベント生存率",
risk.table.height = 0.25,
ggtheme = theme_bw(),
legend.labs = c("対照群", "介入群"),
legend.title = ""
)
# Coxモデルの作成
cox_model <- coxph(
Surv(time, status) ~ interv + age + sex + race + smoke + drink +
CrCL + bmi + regular_exercise + interv:regular_exercise,
data = df_merged
)
# 結果の表示
tbl_regression(
cox_model,
exponentiate = TRUE
) %>%
add_global_p()
Characteristic | HR | 95% CI | p-value |
---|---|---|---|
interv | <0.001 | ||
対照群 | — | — | |
介入群 | 0.14 | 0.05, 0.39 | |
age | 0.99 | 0.97, 1.02 | 0.6 |
sex | 0.6 | ||
男性 | — | — | |
女性 | 0.87 | 0.54, 1.42 | |
race | 0.7 | ||
白人 | — | — | |
黒人 | 0.78 | 0.44, 1.40 | |
その他 | 0.91 | 0.49, 1.69 | |
smoke | 0.8 | ||
非喫煙 | — | — | |
喫煙 | 0.94 | 0.55, 1.62 | |
drink | 0.4 | ||
飲酒なし | — | — | |
飲酒あり | 0.82 | 0.51, 1.33 | |
CrCL | 1.00 | 1.00, 1.00 | 0.5 |
bmi | 1.02 | 0.98, 1.07 | 0.4 |
regular_exercise | 0.7 | ||
0 | — | — | |
1 | 1.35 | 0.66, 2.78 | |
2 | 0.98 | 0.49, 1.97 | |
3 | 0.90 | 0.40, 2.03 | |
interv * regular_exercise | 0.11 | ||
介入群 * 1 | 0.11 | 0.01, 1.09 | |
介入群 * 2 | 0.48 | 0.10, 2.41 | |
介入群 * 3 | 1.29 | 0.20, 8.30 | |
Abbreviations: CI = Confidence Interval, HR = Hazard Ratio |
# regular_exerciseごとのカプランマイヤー曲線
fit_subgroup <- survfit(Surv(time, status) ~ interv + regular_exercise,
data = df_merged)
# プロット
ggsurvplot(
fit_subgroup,
data = df_merged,
risk.table = TRUE,
conf.int = TRUE,
xlab = "観察期間(日)",
ylab = "無イベント生存率",
risk.table.height = 0.25,
ggtheme = theme_bw(),
facet.by = "regular_exercise",
legend.labs = c("対照群", "介入群"),
legend.title = ""
)
## Warning in (function (survsummary, times, survtable = c("cumevents",
## "risk.table", : The length of legend.labs should be 8