2024年5月11日发(作者:)
#=================================================
====================================
# 设置工作目录
getwd();
# 把工作目录转换到数据所在的文件
# 载入WGCNA包
library(WGCNA);
# 在读入数据时,遇到字符串后,不要将其转换成因子,仍然保留为字符串格式(重
要)
options(stringsAsFactors = FALSE);
#读入雌性小鼠肝脏数据
femData = ("");
# 快速浏览肝脏数据
dim(femData);
names(femData);
#把数据的前1-8列删除并转置数据,把转置后的数据命名为datExpr0,列的名称为
substanceBXH,即基因名
#行的名称为原数据去掉1-8列后的列名
datExpr0 = (t(femData[, -c(1:8)]));
names(datExpr0) = femData$substanceBXH;
rownames(datExpr0) = names(femData)[-c(1:8)];
#对datExpr0的数据进行goodSamplesGenes测试,看符合标准的基因的返回值
gsg = goodSamplesGenes(datExpr0, verbose = 3);
gsg$allOK
#如果有不合格的基因或数据,则显示出来,并剔除
if (!gsg$allOK)
{
# 显示出要被删除的基因和数据
if (sum(!gsg$goodGenes)>0)
printFlush(paste("Removing
paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")));
if (sum(!gsg$goodSamples)>0)
printFlush(paste("Removing
paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")));
# 去掉不合格的基因和数据
datExpr0 = datExpr0[gsg$goodSamples, gsg$goodGenes]
}
#对数据进行聚类
sampleTree = hclust(dist(datExpr0), method = "average");
# 画出聚类树: 打开一个大小为12到9英寸的图形输出窗口
# 如果窗口太大或太小,用户应该更改维度
sizeGrWindow(12,9)
genes:",
samples:",
#pdf(文件 = "Plots/", 宽 = 12, 高 = 9);
par(cex = 0.6);
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect
xlab="", = 1.5,
= 1.5, = 2)
# 画一条线切割离群数据
abline(h = 15, col = "red");
outliers", sub="",
# 确定下面的集群
clust = cutreeStatic(sampleTree, cutHeight = 15, minSize = 10)
table(clust)
# 1集群里含有需要的数据,把其命名为keepSamples
keepSamples = (clust==1)
#把dataExpr0中与keepSamples中相同的行定义为datExpr
datExpr = datExpr0[keepSamples, ]
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
#导入traitData数据
traitData = ("");
dim(traitData)
names(traitData)
# 去掉不需要的性状
allTraits = traitData[, -c(31, 16)];
allTraits = allTraits[, c(2, 11:36) ];
dim(allTraits)
names(allTraits)
# 形成一个类似于表达数据的数据框架,这些数据将保持临床特征
#femaleSamples为datExpr的样本名,traitRows为把老鼠特征性状中与
femaleSample中样本一样的提取
femaleSamples = rownames(datExpr);
traitRows = match(femaleSamples, allTraits$Mice);
datTraits = allTraits[traitRows, -1];
rownames(datTraits) = allTraits[traitRows, 1];
collectGarbage();
# 重新对样本进行聚类
sampleTree2 = hclust(dist(datExpr), method = "average")
# 将特征转换为颜色表示:白色表示低,红色表示高,灰色表示缺少条目
traitColors = numbers2colors(datTraits, signed = FALSE);
# 绘制出树状图和下面的颜色.
plotDendroAndColors(sampleTree2, traitColors,
groupLabels = names(datTraits),
main = "Sample dendrogram and trait heatmap")
#保存数据datExpr, datTraits
save(datExpr, datTraits, file = "")
#=================================================
====================================
# 允许多线程,这有助于加快某些计算的速度。
# 警告:如果您运行RStudio或其他第三方R环境,请跳过这一行。
enableWGCNAThreads()
# 导入第一部分的数据
lnames = load(file = "");
lnames
# 构建一个权重基因网络需要选择邻接矩阵权重参数
#给出候选的β值
powers = c(c(1:10), seq(from = 12, to=20, by=2))
# 调用网络拓扑分析函数
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
# 对结果作图
sizeGrWindow(9, 5)
par(mfrow = c(1,2));
cex1 = 0.9;
# 无标度拓扑拟合指数作为软阈值的函数
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed
R^2",type="n",
main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red");
# 在图中高度为0.9的位置画线,这个值对应的是相关系数的平方 R^2
abline(h=0.90,col="red")
# 平均连通性是软阈值能力的函数
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
#调用一个函数能够构建基因网络和识别模块
net = blockwiseModules(datExpr, power = 6,
TOMType = "unsigned", minModuleSize = 30,
reassignThreshold = 0, mergeCutHeight = 0.25,
numericLabels = TRUE, pamRespectsDendro = FALSE,
saveTOMs = TRUE,
saveTOMFileBase = "femaleMouseTOM",
verbose = 3)
# 创建一个图形窗口
sizeGrWindow(12, 9)
# 将标签转化为绘图颜色
mergedColors = labels2colors(net$colors)
# 绘制树状图和下面的模块颜色
plotDendroAndColors(net$dendrograms[[1]],
mergedColors[net$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
#保存数据
moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
#
MEs = net$MEs;
geneTree = net$dendrograms[[1]];
save(MEs, moduleLabels, moduleColors, geneTree,
file = "")
#=================================================
====================================
# 加载第一部分中保存的表达式和特征数据
lnames = load(file = "");
#变量的lname包含了加载变量的名称。
lnames
# 加载在第二部分中保存的网络数据。
lnames = load(file = "");
lnames
# 定义基因和样本的数量
nGenes = ncol(datExpr);
nSamples = nrow(datExpr);
# 用彩色标签重新计算MEs
MEs0 = moduleEigengenes(datExpr, moduleColors)$eigengenes
MEs = orderMEs(MEs0)
moduleTraitCor = cor(MEs, datTraits, use = "p");
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);
sizeGrWindow(10,6)
# 将显示相关性及其p值,
textMatrix = paste(signif(moduleTraitCor, 2), "n(",
signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(6, 8.5, 3, 3));
# 在一个热图图中显示相关值
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = names(datTraits),
yLabels = names(MEs),
ySymbols = names(MEs),
colorLabels = FALSE,
colors = greenWhiteRed(50),
textMatrix = textMatrix,
setStdMargins = FALSE,
= 0.5,
zlim = c(-1,1),
main = paste("Module-trait relationships"))
# 定义包含数据特征权重列的变量权重
weight = (datTraits$weight_g);
names(weight) = "weight"
# 模块的名称(颜色)
modNames = substring(names(MEs), 3)
#gene和模块的关系
geneModuleMembership = (cor(datExpr, MEs, use = "p"));
MMPvalue =
(corPvalueStudent((geneModuleMembership), nSamples));
names(geneModuleMembership) = paste("MM", modNames, sep="");
names(MMPvalue) = paste("", modNames, sep="");
#gene和性状的关系
geneTraitSignificance = (cor(datExpr, weight, use = "p"));
GSPvalue = (corPvalueStudent((geneTraitSignificance),
nSamples));
names(geneTraitSignificance) = paste("GS.", names(weight), sep="");
names(GSPvalue) = paste(".", names(weight), sep="");
module = "brown"
column = match(module, modNames);
moduleGenes = moduleColors==module;
sizeGrWindow(7, 7);
par(mfrow = c(1,1));
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
abs(geneTraitSignificance[moduleGenes, 1]),
xlab = paste("Module Membership in", module, "module"),
ylab = "Gene significance for body weight",
main = paste("Module membership vs. gene significancen"),
= 1.2, = 1.2, = 1.2, col = module)
names(datExpr)
names(datExpr)[moduleColors=="brown"]
#把与体重相关的模块和基因注释信息关联
annot = (file = "");
dim(annot)
names(annot)
probes = names(datExpr)
probes2annot = match(probes, annot$substanceBXH)
# The following is the number or probes without annotation:
sum((probes2annot))
# Should return 0.
# 现在,创建一个数据框架
geneInfo0 = (substanceBXH = probes,
geneSymbol = annot$gene_symbol[probes2annot],
LocusLinkID = annot$LocusLinkID[probes2annot],
moduleColor = moduleColors,
geneTraitSignificance,
GSPvalue)
# 按权重对体重进行排序
modOrder = order(-abs(cor(MEs, weight, use = "p")));
# 按给定的顺序中添加模块成员信息
for (mod in 1:ncol(geneModuleMembership))
{
oldNames = names(geneInfo0)
geneInfo0
modOrder[mod]],
= (geneInfo0, geneModuleMembership[,
MMPvalue[, modOrder[mod]]);
names(geneInfo0) = c(oldNames, paste("MM.", modNames[modOrder[mod]],
sep=""),
paste(".", modNames[modOrder[mod]], sep=""))
}
# 首先通过模块颜色来排列基因信息变量的基因,然后根据基因的重要性
geneOrder = order(geneInfo0$moduleColor, -abs(geneInfo0$));
geneInfo = geneInfo0[geneOrder, ]
(geneInfo, file = "")
发布评论