R Множественная / логистическая регрессия с 3 переменными, статистическое тестирование?

Мне нужна помощь с кодами статистического тестирования. В основном я пытаюсь изучить взаимосвязь между возрастом, политическим положением и взглядами на легализацию каннабиса. Набор данных - это опрос британских социальных отношений 2010 года.

bsa_2010 <- read.csv("https://dl.dropboxusercontent.com/s/ubl9huokroj9jw8/bsa%202010.csv")
> dput(head(bsa_2010))
structure(list(Country = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("England", 
"Scotland", "Wales"), class = "factor"), RSex = structure(c(1L, 
1L, 2L, 2L, 2L, 1L), .Label = c("Female", "Male"), class = "factor"), 
    RAge = c(75L, 34L, 81L, 25L, 33L, 76L), MarStat = structure(c(4L, 
    4L, 2L, 3L, 3L, 5L), .Label = c("Living as married", "Married", 
    "Not married", "Separated or divorced after marrying", "Widowed"
    ), class = "factor"), ChildHh = structure(c(1L, 1L, 1L, 1L, 
    1L, 1L), .Label = c("No", "Yes"), class = "factor"), WhPaper = structure(c(8L, 
    8L, 8L, 11L, 12L, 8L), .Label = c("(Scottish) Daily Express", 
    "(Scottish) Daily Mail", "Daily Mirror/ Scottish Mirror", 
    "Daily Record", "Daily Star", "Daily Telegraph", "Financial Times", 
    "Skip,not read paper normally", "The Guardian", "The Independent", 
    "The Sun/ Scottish Sun", "The Times"), class = "factor"), 
    PartyIDN = structure(c(2L, 7L, 2L, 6L, 2L, 6L), .Label = c("British National Party (BNP)/ National Front", 
    "Conservative", "Don't know", "Green Party", "Labour", "Liberal Democrat", 
    "None", "Other answer (WRITE IN)", "Other party (WRITE IN)", 
    "Plaid Cymru", "Refused to say", "Scottish National Party", 
    "UK Independence Party (UKIP)/Veritas"), class = "factor"), 
    Partyid1 = structure(c(2L, 7L, 2L, 6L, 2L, 6L), .Label = c("British National Party (BNP)/ National Front", 
    "Conservative", "Don't know", "Green Party", "Labour", "Liberal Democrat", 
    "None", "Other answer", "Other party", "Plaid Cymru", "Refusal", 
    "Scottish National Party", "UK Independence Party (UKIP)/Veritas"
    ), class = "factor"), PartyId2 = structure(c(1L, 5L, 1L, 
    4L, 1L, 4L), .Label = c("Conservative", "Green Party", "Labour", 
    "Liberal Democrat", "None", "Other party", "Other/DK/Ref"
    ), class = "factor"), Spend1 = structure(c(3L, 4L, 4L, 3L, 
    3L, 4L), .Label = c("(None of these)", "Defence", "Education", 
    "Health", "Help for industry", "Housing", "Overseas aid", 
    "Police and prisons", "Public transport", "Roads", "Social security benefits"
    ), class = "factor"), Spend2 = structure(c(6L, 3L, 2L, 4L, 
    9L, 10L), .Label = c("(None of these)", "Defence", "Education", 
    "Health", "Help for industry", "Housing", "Overseas aid", 
    "Police and prisons", "Public transport", "Roads", "Skip,no 1st priority", 
    "Social security benefits"), class = "factor"), RClassGp = structure(c(4L, 
    6L, 1L, 2L, 4L, 6L), .Label = c("Employers in small org; own account workers", 
    "Intermediate occupations", "Lower supervisory & technical occupations", 
    "Managerial & professional occups", "Not classifiable", "Semi-routine & routine occupations", 
    "Skip, never had a job+DK+NA last job"), class = "factor"), 
    RNSSECG = structure(c(4L, 8L, 9L, 3L, 4L, 8L), .Label = c("1.1", 
    "1.2", "Intermediate occupations", "Lower managerial and professional occupations", 
    "Lower supervisory & technical occupations", "Not classified", 
    "Routine occupations", "Semi-routine Occupations", "Small employers and own account workers"
    ), class = "factor"), CanLegal = structure(c(1L, 1L, 1L, 
    2L, 2L, 1L), .Label = c("Taking cannabis should remain illegal", 
    "should be legal, only licenced shops"), class = "factor"), 
    RaceOri3 = structure(c(10L, 10L, 10L, 10L, 10L, 10L), .Label = c("ASIAN: of Bangladeshi origin", 
    "ASIAN: of Chinese origin", "ASIAN: of Indian origin", "ASIAN: of Pakistani origin", 
    "ASIAN: of other origin (WRITE IN)", "BLACK: of African origin", 
    "BLACK: of Caribbean origin", "MIXED ORIGIN (WRITE IN)", 
    "OTHER (WRITE IN)", "WHITE: of any origin"), class = "factor"), 
    Agecat1 = structure(c(6L, 2L, 7L, 1L, 2L, 6L), .Label = c("(18,28]", 
    "(28,38]", "(38,48]", "(48,58]", "(58,68]", "(68,78]", "(78,88]", 
    "(88,98]"), class = "factor"), Agecat2 = structure(c(3L, 
    1L, 4L, 1L, 1L, 3L), .Label = c("(18,38]", "(38,58]", "(58,78]", 
    "(78,98]"), class = "factor")), .Names = c("Country", "RSex", 
"RAge", "MarStat", "ChildHh", "WhPaper", "PartyIDN", "Partyid1", 
"PartyId2", "Spend1", "Spend2", "RClassGp", "RNSSECG", "CanLegal", 
"RaceOri3", "Agecat1", "Agecat2"), row.names = c(NA, 6L), class = "data.frame")

Используемые переменные: RAge (возраст) PartyIDN (определена политическая партия) CanLegal (легализация каннабиса)

Чтобы упростить это, я поместил возрасты в категории и сохранил только две крупнейшие политические партии.

bsa_2010$Agecat1 <- cut(bsa_2010$RAge, c(18,28,38,48,58,68,78,88,98))

Parties <- subset(bsa_2010, PartyIDN == "Conservative" | PartyIDN == "Labour")


Parties$PartyIDN <- factor(Parties$PartyIDN)

Как бы я провел статистическое тестирование, чтобы получить значения P для связи между возрастом, политикой и как они смотрят на легализацию каннабиса?

Любая помощь будет приветствоваться, спасибо!

1 ответ

Решение

Похоже, вам также нужна помощь с основами, поэтому я бы посмотрел https://stats.idre.ucla.edu/r/dae/logit-regression/ для части статистики, но часть R проста (я собираюсь считать ваши новые возрастные категории фактором, а не непрерывной переменной)...

bsa_2010$Agecat1 <- cut(bsa_2010$RAge, c(18,28,38,48,58,68,78,88,98))
Parties <- subset(bsa_2010, PartyIDN == "Conservative" | PartyIDN == "Labour")
Parties$PartyIDN <- factor(Parties$PartyIDN)
str(Parties)
Parties$Agecat1<-factor(Parties$Agecat1)
firstattempt <- glm(CanLegal ~ Agecat1 + PartyIDN, data = Parties, family = "binomial")
summary(firstattempt)

За то, что это стоит сократить по возрасту, не поможет сделать R Команды или статистика проще. Вы могли бы так же легко сделать:

secondattempt <- glm(CanLegal ~ RAge + PartyIDN, data = Parties, family = "binomial")
summary(secondattempt)

Это фактически облегчает интерпретацию данных, что подтверждает гипотезу о том, что возраст имеет значение для результата, а политическая партия - нет. Вы можете видеть это с этими очень простыми графиками

mosaicplot(xtabs(~CanLegal + PartyIDN, data = Parties))
mosaicplot(xtabs(~CanLegal + Agecat1, data = Parties))
Другие вопросы по тегам