Code aus der Vorlesung

Vorlesung vom 26.05.2026

x <- rnorm(1000000, mean = 0, sd = 1)
hist(x, freq = FALSE, breaks = 100)
curve(dnorm, col = "red", add = TRUE)

## Aufgabe 8.16

# (a) P(X>5) = 1 - P(X <= 5) = 1 - F(5)

1-pnorm(5, mean = 0.5, sd = sqrt(4))
[1] 0.01222447
# Alternative:
# 1 - P(X <= 5) = 1 - P((X-0.5)/2 <= (5-0.5)/2)
# = 1 - P(Y <= 2.25) (mit Y ~ N(0,1))
# = 1 - Phi(2.25)

1 - pnorm(2.25)
[1] 0.01222447
# (b) P(|X| < 4) = P(-4 < X < 4) = P(X < 4) - P(X < -4) 
# = F(4) - F(-4)

pnorm(4, mean = 0.5, sd = 2) - pnorm(-4, mean = 0.5, sd = 2)
[1] 0.9477164

Vorlesung vom 21.05.2026

# Wahrscheinlichkeitsfunktion der Binomialverteilung
plot(0:12, dbinom(x = 0:12, size = 12, prob = 0.4), type = "h")

# Verteilungsfunktion der Binomialverteilung
pbinom(0:12, 12, 0.4)
 [1] 0.002176782 0.019591041 0.083443323 0.225337283 0.438178222 0.665208558
 [7] 0.841787707 0.942690079 0.984732733 0.997189816 0.999681233 0.999983223
[13] 1.000000000
# Aufgabe 8.15
# (b)

pbinom(1, 30, 0.15)
[1] 0.0480289
dbinom(0, 30, 0.15) + dbinom(1,30,0.15)
[1] 0.0480289
# 2. Variante
1-pbinom(28,30,0.85)
[1] 0.0480289
# Normalverteilung
pnorm(3,1,2)
[1] 0.8413447
pnorm(1)
[1] 0.8413447

Vorlesung vom 30.04.2026

round(cor(mtcars),2)
       mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
mpg   1.00 -0.85 -0.85 -0.78  0.68 -0.87  0.42  0.66  0.60  0.48 -0.55
cyl  -0.85  1.00  0.90  0.83 -0.70  0.78 -0.59 -0.81 -0.52 -0.49  0.53
disp -0.85  0.90  1.00  0.79 -0.71  0.89 -0.43 -0.71 -0.59 -0.56  0.39
hp   -0.78  0.83  0.79  1.00 -0.45  0.66 -0.71 -0.72 -0.24 -0.13  0.75
drat  0.68 -0.70 -0.71 -0.45  1.00 -0.71  0.09  0.44  0.71  0.70 -0.09
wt   -0.87  0.78  0.89  0.66 -0.71  1.00 -0.17 -0.55 -0.69 -0.58  0.43
qsec  0.42 -0.59 -0.43 -0.71  0.09 -0.17  1.00  0.74 -0.23 -0.21 -0.66
vs    0.66 -0.81 -0.71 -0.72  0.44 -0.55  0.74  1.00  0.17  0.21 -0.57
am    0.60 -0.52 -0.59 -0.24  0.71 -0.69 -0.23  0.17  1.00  0.79  0.06
gear  0.48 -0.49 -0.56 -0.13  0.70 -0.58 -0.21  0.21  0.79  1.00  0.27
carb -0.55  0.53  0.39  0.75 -0.09  0.43 -0.66 -0.57  0.06  0.27  1.00
mtcars$am_factor <- factor(mtcars$am)
levels(mtcars$am_factor) <- c("automatic", "manual")
mtcars$gear_factor <- factor(mtcars$gear)


# Multiple Lineare Regression
model <- lm(mpg ~ wt + cyl  + hp + am_factor + gear_factor, data = mtcars)
summary(model)

Call:
lm(formula = mpg ~ wt + cyl + hp + am_factor + gear_factor, data = mtcars)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.5391 -1.7849 -0.5956  1.2845  5.6237 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)     36.58455    3.68080   9.939 3.63e-10 ***
wt              -2.58611    1.00901  -2.563   0.0168 *  
cyl             -0.81901    0.68255  -1.200   0.2414    
hp              -0.02439    0.01749  -1.395   0.1753    
am_factormanual  1.73375    1.82298   0.951   0.3507    
gear_factor4    -0.43510    1.76951  -0.246   0.8078    
gear_factor5    -0.44534    2.42212  -0.184   0.8556    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.604 on 25 degrees of freedom
Multiple R-squared:  0.8494,    Adjusted R-squared:  0.8133 
F-statistic:  23.5 on 6 and 25 DF,  p-value: 3.827e-09
# Kolinearität? -> VIF
library(car)
Loading required package: carData
vif(model)
                GVIF Df GVIF^(1/(2*Df))
wt          4.454727  1        2.110622
cyl         6.791124  1        2.605979
hp          6.569113  1        2.563028
am_factor   3.781727  1        1.944666
gear_factor 7.152087  2        1.635341
plot(x = mtcars$wt,
     y = mtcars$mpg)
abline(model, col = "red")
Warning in abline(model, col = "red"): only using the first two of 7 regression
coefficients

plot(model, which = 1)

plot(model, which = 4)

Vorlesung vom 28.04.2026

# Aufgabe 6.8

library(MSBStatsData)

model <- lm(demand ~ selling_price_eur + product_quality, 
            data = product_demand_testphases)
model

Call:
lm(formula = demand ~ selling_price_eur + product_quality, data = product_demand_testphases)

Coefficients:
      (Intercept)  selling_price_eur    product_quality  
              4.0               -1.5                0.6  
cor(product_demand_testphases) |> round(2)
                  selling_price_eur product_quality demand
selling_price_eur              1.00            0.00  -0.62
product_quality                0.00            1.00   0.76
demand                        -0.62            0.76   1.00
# Variable Inflation Factor (VIF) -> Kolinearität?
# install.packages("car")
library(car)
vif(model)
selling_price_eur   product_quality 
                1                 1 
summary(model)

Call:
lm(formula = demand ~ selling_price_eur + product_quality, data = product_demand_testphases)

Residuals:
         1          2          3          4          5          6 
 5.000e-01 -1.590e-15 -5.000e-01 -5.000e-01  1.035e-16  5.000e-01 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)   
(Intercept)        4.00000    1.37437   2.910  0.06198 . 
selling_price_eur -1.50000    0.28868  -5.196  0.01385 * 
product_quality    0.60000    0.09428   6.364  0.00785 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.5774 on 3 degrees of freedom
Multiple R-squared:  0.9574,    Adjusted R-squared:  0.9291 
F-statistic: 33.75 on 2 and 3 DF,  p-value: 0.008778
# -> beide = 1, also unproblematisch

Vorlesung vom 23.04.2026

# Aufgabe 6.4

library(MSBStatsData)

## Regressionsmodell berechnn
study_model <- lm(points ~ study_days, data = exam_study_time)

## Visualisierung
plot(x = exam_study_time$study_days,
     y = exam_study_time$points,
     xlab = "Lerntage",
     ylab = "Klausurpunkte",
     xlim = c(0,22),
     ylim = c(0,50))
abline(study_model, col = "red")

## Vorhersage
koeffizienten <- coef(study_model)
koeffizienten[1] + koeffizienten[2]*21
(Intercept) 
   46.26667 
exam_study_time_new <- data.frame(study_days = c(21, 30, 15, 23))
predict(study_model, newdata = exam_study_time_new)
       1        2        3        4 
46.26667 68.46667 31.46667 51.20000 
## Model summary
summary(study_model)

Call:
lm(formula = points ~ study_days, data = exam_study_time)

Residuals:
      1       2       3       4       5 
 6.0000 -3.9333 -0.7333 -4.2000  2.8667 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)  
(Intercept)  -5.5333     6.8982  -0.802   0.4811  
study_days    2.4667     0.4651   5.304   0.0131 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 5.095 on 3 degrees of freedom
Multiple R-squared:  0.9036,    Adjusted R-squared:  0.8715 
F-statistic: 28.13 on 1 and 3 DF,  p-value: 0.01308
## Modelldiagnostik
plot(study_model, which = 1)

plot(study_model, which = 4)

## Anscome Beispiele

### Beispiel Nicht-Linearität

plot(anscombe$x2, anscombe$y2)
ans2 <- lm(y2 ~ x2, data = anscombe)
abline(ans2, col = "red")

plot(ans2, which = 1)

### Beispiel Ausreißer/Influential Point
plot(anscombe$x3, anscombe$y3)
ans3 <- lm(y3 ~ x3, data = anscombe)
abline(ans3, col = "red")

plot(ans3, which = 4)

plot(anscombe$x3, anscombe$y3, 
     pch = as.character(1:nrow(anscombe)))

Vorlesung vom 16.04.2026

library(MSBStatsData)

# Korrelation von zwei Merkmalen
cor(decathlon$race100m, decathlon$longjump)
[1] -0.4838989
# Korrelationsmatrix
cor(decathlon) |> round(2)
                race100m longjump shotput highjump race400m race110mhurdles
race100m            1.00    -0.48   -0.15    -0.12     0.57            0.45
longjump           -0.48     1.00    0.25     0.36    -0.31           -0.38
shotput            -0.15     0.25    1.00     0.16    -0.03           -0.25
highjump           -0.12     0.36    0.16     1.00    -0.11           -0.25
race400m            0.57    -0.31   -0.03    -0.11     1.00            0.38
race110mhurdles     0.45    -0.38   -0.25    -0.25     0.38            1.00
discus             -0.12     0.20    0.72     0.14    -0.04           -0.22
polevault          -0.17     0.27    0.25     0.19    -0.13           -0.29
javelinthrow       -0.06     0.17    0.44     0.07    -0.02           -0.13
race1500m          -0.09     0.02    0.11    -0.01     0.38            0.01
                discus polevault javelinthrow race1500m
race100m         -0.12     -0.17        -0.06     -0.09
longjump          0.20      0.27         0.17      0.02
shotput           0.72      0.25         0.44      0.11
highjump          0.14      0.19         0.07     -0.01
race400m         -0.04     -0.13        -0.02      0.38
race110mhurdles  -0.22     -0.29        -0.13      0.01
discus            1.00      0.27         0.42      0.08
polevault         0.27      1.00         0.19     -0.01
javelinthrow      0.42      0.19         1.00      0.02
race1500m         0.08     -0.01         0.02      1.00
# Visualisierung der Korrelation
# install.packages("corrplot")
library(corrplot)
corrplot 0.95 loaded
corrplot(cor(decathlon))

# Rangkorrelation nach Spearman

library(ggplot2)

plot(x = diamonds$carat,
     y = diamonds$price,
     xlab = "Gewicht in Karat",
     ylab = "Preis in USD")

cor(diamonds$carat, 
    diamonds$price,
    method = "pearson")
[1] 0.9215913
cor(diamonds$carat, 
    diamonds$price,
    method = "spearman")
[1] 0.9628828
cor(diamonds$carat, 
    diamonds$price,
    method = "kendall")
[1] 0.8341049
# Rangkorrelation zwischen cut und color

cor(rank(diamonds$cut),
    rank(diamonds$color),
    method = "spearman")
[1] -0.01718216
cor(rank(diamonds$cut),
    diamonds$price,
    method = "spearman")
[1] -0.09297484
# Beispiel von den Folien

df_rating <- data.frame(
  Bestellung = 1:10,
  Lieferzeit = factor(
    c("sehr kurz", "kurz", "kurz", "mittel", "mittel",
      "lang", "lang", "sehr lang", "sehr lang", "sehr lang"),
    levels = c(
      "sehr kurz", "kurz", "mittel", "lang", "sehr lang"
    ),
    ordered = TRUE
  ),
  Zufriedenheit = factor(
    c("sehr hoch", "hoch", "hoch", "mittel", "mittel",
      "niedrig", "niedrig", "sehr niedrig", "niedrig", "sehr niedrig"),
    levels = c(
      "sehr niedrig", "niedrig", "mittel", "hoch", "sehr hoch"
    ),
    ordered = TRUE
  )
)

Vorlesung vom 14.04.2026

# Aufgabe 4.3
mieten <- c(300,250,400,500,250,600,300,300,450,400)

library(MSBStatsData)
mieten <- cold_rents$monthly_rent_eur

# arithmetische Mittel
mean(mieten)
[1] 375
mean(cold_rents$monthly_rent_eur)
[1] 375
# Stichprobenvarianz, Stichprobenstandardabweichung (Version mit 1/(n-1))
var(mieten)
[1] 13472.22
sd(mieten)
[1] 116.0699
var(mieten) |> sqrt()
[1] 116.0699
# Varianz, Standardabweichung (Version mit 1/n)
n <- length(mieten)
(n-1)/n * var(mieten)
[1] 12125
(n-1)/n * var(mieten) |> sqrt()
[1] 104.4629
# Mittlere absolute Abweichung
(mieten - median(mieten)) |> abs() |> mean()
[1] 95
mean(abs(mieten - median(mieten)))
[1] 95
# Median der absoluten Abweichungen
(mieten - median(mieten)) |> abs() |> median()
[1] 75
# Spannweite
max(mieten) - min(mieten)
[1] 350
range(mieten) |> diff()
[1] 350
# Summary
summary(cold_rents)
 monthly_rent_eur
 Min.   :250.0   
 1st Qu.:300.0   
 Median :350.0   
 Mean   :375.0   
 3rd Qu.:437.5   
 Max.   :600.0   
summary(mieten)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  250.0   300.0   350.0   375.0   437.5   600.0 
summary(mtcars)
      mpg             cyl             disp             hp       
 Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
 1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
 Median :19.20   Median :6.000   Median :196.3   Median :123.0  
 Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
 3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
 Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
      drat             wt             qsec             vs        
 Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
 1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
 Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
 Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
 3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
 Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
       am              gear            carb           am_factor  gear_factor
 Min.   :0.0000   Min.   :3.000   Min.   :1.000   automatic:19   3:15       
 1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000   manual   :13   4:12       
 Median :0.0000   Median :4.000   Median :2.000                  5: 5       
 Mean   :0.4062   Mean   :3.688   Mean   :2.812                             
 3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000                             
 Max.   :1.0000   Max.   :5.000   Max.   :8.000                             
# Korrelationskoeffizient

library(MSBStatsData)
summary(decathlon)
    race100m        longjump        shotput         highjump    
 Min.   :10.22   Min.   :5.920   Min.   : 8.87   Min.   :1.510  
 1st Qu.:11.06   1st Qu.:6.760   1st Qu.:12.25   1st Qu.:1.880  
 Median :11.25   Median :6.960   Median :13.08   Median :1.940  
 Mean   :11.25   Mean   :6.969   Mean   :13.12   Mean   :1.938  
 3rd Qu.:11.44   3rd Qu.:7.180   3rd Qu.:13.96   3rd Qu.:2.000  
 Max.   :12.28   Max.   :8.110   Max.   :17.45   Max.   :2.270  
    race400m     race110mhurdles     discus        polevault    
 Min.   :46.21   Min.   :13.47   Min.   :17.78   Min.   :2.850  
 1st Qu.:49.63   1st Qu.:14.85   1st Qu.:36.80   1st Qu.:4.200  
 Median :50.55   Median :15.21   Median :39.54   Median :4.400  
 Mean   :50.61   Mean   :15.24   Mean   :39.64   Mean   :4.405  
 3rd Qu.:51.51   3rd Qu.:15.61   3rd Qu.:42.38   3rd Qu.:4.650  
 Max.   :56.70   Max.   :17.98   Max.   :54.78   Max.   :5.760  
  javelinthrow     race1500m    
 Min.   :32.20   Min.   :241.0  
 1st Qu.:49.96   1st Qu.:273.0  
 Median :53.70   Median :281.8  
 Mean   :53.98   Mean   :283.5  
 3rd Qu.:57.93   3rd Qu.:291.9  
 Max.   :79.80   Max.   :401.2  
plot(x = decathlon$race100m, 
     y = decathlon$longjump, 
     xlab = "100m in Sekunden",
     ylab = "Weitsprung in Metern")