Hands-on Workshop
Cosa impareremo oggi:
Approccio:
Scarica il file esercizi: 10_visualizzazioni_avanzate_exercises.R
30 esercizi organizzati in 6 parti:
Dataset: gene expression, clinical data
Composizione di grafici
patchwork è un’estensione ggplot2 che permette di:
Filosofia: Usa operatori intuitivi (+, /, |) per layout complessi senza codice verbose
library(patchwork)
library(ggplot2)
# Crea grafici individuali
p1 <- ggplot(mtcars, aes(mpg, disp)) +
geom_point(aes(color = factor(cyl))) +
labs(title = "Displacement vs MPG")
p2 <- ggplot(mtcars, aes(factor(cyl), mpg)) +
geom_boxplot(aes(fill = factor(cyl))) +
labs(title = "MPG by Cylinders")
p3 <- ggplot(mtcars, aes(wt, mpg)) +
geom_point(aes(size = hp)) +
geom_smooth(method = "lm") +
labs(title = "Weight vs MPG")
# Combina!
p1 + p2 + p3Operatore +
Affianca grafici in griglia automatica
Allineamento
Le plot areas si allineano automaticamente, anche con assi di lunghezze diverse
# Verticale con /
p1 / p2 / p3
# Orizzontale con |
p1 | p2 | p3
# Combinazioni
(p1 + p2) / p3
# Layout più complesso
(p1 | p2) / (p3 + plot_spacer() + p3)Parentesi per Precedenza
Usa () per controllare ordine nesting. Senza parentesi, + e / hanno stessa precedenza e vengono valutati da sinistra a destra
widths/heights
Vettori numerici: proporzioni relative o unità assolute (unit(5, "cm"))
guides = “collect”
Raccoglie tutte le legende in una sola posizione (evita duplicazioni)
Adesso tocca a te!
p1 + p2 + p3 +
plot_annotation(
title = "Analisi Multi-Panel di mtcars",
subtitle = "Relazioni tra variabili chiave",
caption = "Dati: mtcars dataset",
tag_levels = "A", # A, B, C...
tag_prefix = "Fig. ",
tag_suffix = ":",
theme = theme(
plot.title = element_text(size = 16, face = "bold")
)
)tag_levels
Opzioni: "A" (maiuscole), "a" (minuscole), "1" (numeri), "I" (romani), "i" (romani minuscoli), o vettore custom
# Operatore & applica a tutti i plot
p1 + p2 + p3 &
theme_minimal() &
theme(
legend.position = "bottom"
)
# Applica solo a subplot specifici
p1 + (p2 & theme_dark()) + p3
# Combina con annotazioni
(p1 + p2 + p3) &
theme_bw() &
theme(legend.position = "none") +
plot_annotation(
title = "No Legends, Clean Theme"
)Operatore & vs +
&: applica elemento a tutti i subplot+: aggiunge layer/elemento solo all’ultimo plot# Design testuale
design <- "
AAB
CCC
"
p1 + p2 + p3 +
plot_layout(design = design)
# Design con area()
layout <- c(
area(t = 1, l = 1, b = 2, r = 2), # p1: righe 1-2, colonne 1-2
area(t = 1, l = 3, b = 1, r = 4), # p2: riga 1, colonne 3-4
area(t = 2, l = 3, b = 2, r = 4) # p3: riga 2, colonne 3-4
)
p1 + p2 + p3 +
plot_layout(design = layout)Design Testuale
Ogni lettera = 1 cella. Lettere uguali = stesso plot espanso su più celle. # = spazio vuoto
Adesso tocca a te!
# Solo testo
ggplot(mtcars, aes(wt, mpg, label = rownames(mtcars))) +
geom_point(aes(color = factor(cyl))) +
geom_text_repel(size = 3)
# Testo con box background
ggplot(mtcars, aes(wt, mpg, label = rownames(mtcars))) +
geom_point(aes(color = factor(cyl))) +
geom_label_repel(
size = 3,
box.padding = 0.5, # Padding intorno testo
point.padding = 0.3, # Distanza da punto
segment.color = "grey50" # Colore linea connessione
)Quando usare label vs text
geom_label_repel() ha background box → migliore leggibilità su plot complessi
direction
"both" (default): libertà totale"x": solo orizzontale"y": solo verticaleQuando limitare
Utile per mantenere allineamento visivo con assi
# Strategia 1: Filter nel layer
mtcars_subset <- mtcars[mtcars$mpg > 25, ]
ggplot(mtcars, aes(wt, mpg)) +
geom_point() +
geom_text_repel(
data = mtcars_subset,
aes(label = rownames(mtcars_subset)),
color = "red"
)
# Strategia 2: Conditional label
mtcars$label <- ifelse(mtcars$mpg > 25, rownames(mtcars), "")
ggplot(mtcars, aes(wt, mpg, label = label)) +
geom_point() +
geom_text_repel()Best Practice
Etichetta solo punti rilevanti (outliers, soglie) per plot leggibili
Adesso tocca a te!
Matrice di valori codificati tramite colore
Quando usare:
# Prepara dati long format
library(tidyr)
heatmap_data <- mtcars %>%
rownames_to_column("car") %>%
pivot_longer(cols = -car, names_to = "variable", values_to = "value") %>%
group_by(variable) %>%
mutate(value_scaled = scale(value)[,1]) # Z-score scaling
# Heatmap
ggplot(heatmap_data, aes(x = variable, y = car, fill = value_scaled)) +
geom_tile(color = "white", size = 0.5) +
scale_fill_gradient2(
low = "blue", mid = "white", high = "red",
midpoint = 0,
name = "Z-score"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))Sequential
Per valori che vanno da basso → alto (es. espressione genica non normalizzata)
Diverging
Per valori con punto centrale significativo (es. fold-change, correlazioni)
# Clustering gerarchico manuale
library(dendextend)
# Converti in wide matrix
heat_matrix <- heatmap_data %>%
select(car, variable, value_scaled) %>%
pivot_wider(names_from = variable, values_from = value_scaled) %>%
column_to_rownames("car") %>%
as.matrix()
# Clustering
row_order <- hclust(dist(heat_matrix))$order
col_order <- hclust(dist(t(heat_matrix)))$order
# Riordina factor levels
heatmap_data$car <- factor(heatmap_data$car,
levels = rownames(heat_matrix)[row_order])
heatmap_data$variable <- factor(heatmap_data$variable,
levels = colnames(heat_matrix)[col_order])
# Plot con ordine cluster
ggplot(heatmap_data, aes(variable, car, fill = value_scaled)) +
geom_tile() +
scale_fill_gradient2(low = "blue", mid = "white", high = "red")Adesso tocca a te!
Visualizzazione differential expression
Regioni:
# Simula dati DEG
set.seed(123)
volcano_data <- data.frame(
gene = paste0("Gene_", 1:1000),
log2FC = rnorm(1000, mean = 0, sd = 2),
pvalue = runif(1000, 0, 1)
) %>%
mutate(
padj = p.adjust(pvalue, method = "BH"),
significant = case_when(
abs(log2FC) > 1 & padj < 0.05 ~ "Significant",
TRUE ~ "Not Significant"
)
)
# Volcano
ggplot(volcano_data, aes(x = log2FC, y = -log10(padj))) +
geom_point(aes(color = significant), alpha = 0.6, size = 2) +
scale_color_manual(values = c("grey70", "red3")) +
geom_vline(xintercept = c(-1, 1), linetype = "dashed") +
geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
theme_bw()volcano_data <- volcano_data %>%
mutate(
regulation = case_when(
log2FC > 1 & padj < 0.05 ~ "Up-regulated",
log2FC < -1 & padj < 0.05 ~ "Down-regulated",
abs(log2FC) > 1 ~ "Not Significant (FC only)",
padj < 0.05 ~ "Not Significant (p-val only)",
TRUE ~ "Not Significant"
),
regulation = factor(regulation, levels = c(
"Up-regulated", "Down-regulated",
"Not Significant (FC only)", "Not Significant (p-val only)",
"Not Significant"
))
)
ggplot(volcano_data, aes(x = log2FC, y = -log10(padj), color = regulation)) +
geom_point(alpha = 0.7, size = 2.5) +
scale_color_manual(values = c(
"Up-regulated" = "red3",
"Down-regulated" = "blue3",
"Not Significant (FC only)" = "orange",
"Not Significant (p-val only)" = "purple",
"Not Significant" = "grey70"
))library(ggrepel)
# Identifica top geni
top_genes <- volcano_data %>%
filter(significant == "Significant") %>%
arrange(padj) %>%
slice_head(n = 10)
# Volcano con labels
ggplot(volcano_data, aes(x = log2FC, y = -log10(padj))) +
geom_point(aes(color = significant), alpha = 0.6) +
geom_point(data = top_genes, color = "black", size = 3) + # Highlight
geom_text_repel(
data = top_genes,
aes(label = gene),
size = 3,
box.padding = 0.5,
max.overlaps = Inf
) +
scale_color_manual(values = c("grey70", "red3")) +
geom_vline(xintercept = c(-1, 1), linetype = "dashed", alpha = 0.5) +
geom_hline(yintercept = -log10(0.05), linetype = "dashed", alpha = 0.5) +
theme_minimal()Adesso tocca a te!
library(patchwork)
# 1. Volcano plot
p_volcano <- ggplot(volcano_data, aes(x = log2FC, y = -log10(padj))) +
geom_point(aes(color = regulation), alpha = 0.7) +
scale_color_manual(values = c("red3", "blue3", "orange", "purple", "grey70")) +
theme_minimal() +
labs(title = "A. Differential Expression")
# 2. Heatmap top 20 geni
top20 <- volcano_data %>%
filter(significant == "Significant") %>%
arrange(padj) %>%
slice_head(n = 20) %>%
mutate(expression = rnorm(n(), mean = log2FC, sd = 0.5)) # Simula expression
p_heatmap <- ggplot(top20, aes(x = "Sample", y = gene, fill = expression)) +
geom_tile() +
scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0) +
theme_minimal() +
labs(title = "B. Top 20 DEGs Expression") +
theme(axis.title.x = element_blank())
# Combina con patchwork
p_volcano + p_heatmap +
plot_layout(widths = c(2, 1)) +
plot_annotation(
title = "Comprehensive Differential Expression Analysis",
theme = theme(plot.title = element_text(size = 16, face = "bold"))
)# Crea 4 visualizzazioni complementari
p1 <- ggplot(volcano_data, aes(log2FC)) +
geom_histogram(aes(fill = significant), bins = 50) +
theme_minimal() +
labs(title = "Fold Change Distribution")
p2 <- ggplot(volcano_data, aes(-log10(padj))) +
geom_density(fill = "skyblue", alpha = 0.7) +
theme_minimal() +
labs(title = "Significance Distribution")
p3 <- ggplot(volcano_data, aes(regulation)) +
geom_bar(aes(fill = regulation)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "DEG Categories")
p4 <- ggplot(volcano_data, aes(log2FC, -log10(padj), color = regulation)) +
geom_point(alpha = 0.7) +
theme_minimal() +
labs(title = "Volcano Plot")
# Layout complesso
design <- "
AAAB
CCDB
"
(p1 + p2 + p3 + p4) +
plot_layout(design = design, guides = "collect") &
theme(legend.position = "bottom")Adesso tocca a te!

REVELO Training - Data Viz 2025