2024年5月11日发(作者:)

#=================================================

====================================

# 设置工作目录

getwd();

# 把工作目录转换到数据所在的文件

# 载入WGCNA包

library(WGCNA);

# 在读入数据时,遇到字符串后,不要将其转换成因子,仍然保留为字符串格式(重

要)

options(stringsAsFactors = FALSE);

#读入雌性小鼠肝脏数据

femData = ("");

# 快速浏览肝脏数据

dim(femData);

names(femData);

#把数据的前1-8列删除并转置数据,把转置后的数据命名为datExpr0,列的名称为

substanceBXH,即基因名

#行的名称为原数据去掉1-8列后的列名

datExpr0 = (t(femData[, -c(1:8)]));

names(datExpr0) = femData$substanceBXH;

rownames(datExpr0) = names(femData)[-c(1:8)];

#对datExpr0的数据进行goodSamplesGenes测试,看符合标准的基因的返回值

gsg = goodSamplesGenes(datExpr0, verbose = 3);

gsg$allOK

#如果有不合格的基因或数据,则显示出来,并剔除

if (!gsg$allOK)

{

# 显示出要被删除的基因和数据

if (sum(!gsg$goodGenes)>0)

printFlush(paste("Removing

paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")));

if (sum(!gsg$goodSamples)>0)

printFlush(paste("Removing

paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")));

# 去掉不合格的基因和数据

datExpr0 = datExpr0[gsg$goodSamples, gsg$goodGenes]

}

#对数据进行聚类

sampleTree = hclust(dist(datExpr0), method = "average");

# 画出聚类树: 打开一个大小为12到9英寸的图形输出窗口

# 如果窗口太大或太小,用户应该更改维度

sizeGrWindow(12,9)

genes:",

samples:",

#pdf(文件 = "Plots/", 宽 = 12, 高 = 9);

par(cex = 0.6);

par(mar = c(0,4,2,0))

plot(sampleTree, main = "Sample clustering to detect

xlab="", = 1.5,

= 1.5, = 2)

# 画一条线切割离群数据

abline(h = 15, col = "red");

outliers", sub="",

# 确定下面的集群

clust = cutreeStatic(sampleTree, cutHeight = 15, minSize = 10)

table(clust)

# 1集群里含有需要的数据,把其命名为keepSamples

keepSamples = (clust==1)

#把dataExpr0中与keepSamples中相同的行定义为datExpr

datExpr = datExpr0[keepSamples, ]

nGenes = ncol(datExpr)

nSamples = nrow(datExpr)

#导入traitData数据

traitData = ("");

dim(traitData)

names(traitData)

# 去掉不需要的性状

allTraits = traitData[, -c(31, 16)];

allTraits = allTraits[, c(2, 11:36) ];

dim(allTraits)

names(allTraits)

# 形成一个类似于表达数据的数据框架,这些数据将保持临床特征

#femaleSamples为datExpr的样本名,traitRows为把老鼠特征性状中与

femaleSample中样本一样的提取

femaleSamples = rownames(datExpr);

traitRows = match(femaleSamples, allTraits$Mice);

datTraits = allTraits[traitRows, -1];

rownames(datTraits) = allTraits[traitRows, 1];

collectGarbage();

# 重新对样本进行聚类

sampleTree2 = hclust(dist(datExpr), method = "average")

# 将特征转换为颜色表示:白色表示低,红色表示高,灰色表示缺少条目

traitColors = numbers2colors(datTraits, signed = FALSE);

# 绘制出树状图和下面的颜色.

plotDendroAndColors(sampleTree2, traitColors,

groupLabels = names(datTraits),

main = "Sample dendrogram and trait heatmap")

#保存数据datExpr, datTraits

save(datExpr, datTraits, file = "")

#=================================================

====================================

# 允许多线程,这有助于加快某些计算的速度。

# 警告:如果您运行RStudio或其他第三方R环境,请跳过这一行。

enableWGCNAThreads()

# 导入第一部分的数据

lnames = load(file = "");

lnames

# 构建一个权重基因网络需要选择邻接矩阵权重参数

#给出候选的β值

powers = c(c(1:10), seq(from = 12, to=20, by=2))

# 调用网络拓扑分析函数

sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)

# 对结果作图

sizeGrWindow(9, 5)

par(mfrow = c(1,2));

cex1 = 0.9;

# 无标度拓扑拟合指数作为软阈值的函数

plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],

xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed

R^2",type="n",

main = paste("Scale independence"));

text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],

labels=powers,cex=cex1,col="red");

# 在图中高度为0.9的位置画线,这个值对应的是相关系数的平方 R^2

abline(h=0.90,col="red")

# 平均连通性是软阈值能力的函数

plot(sft$fitIndices[,1], sft$fitIndices[,5],

xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",

main = paste("Mean connectivity"))

text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")

#调用一个函数能够构建基因网络和识别模块

net = blockwiseModules(datExpr, power = 6,

TOMType = "unsigned", minModuleSize = 30,

reassignThreshold = 0, mergeCutHeight = 0.25,

numericLabels = TRUE, pamRespectsDendro = FALSE,

saveTOMs = TRUE,

saveTOMFileBase = "femaleMouseTOM",

verbose = 3)

# 创建一个图形窗口

sizeGrWindow(12, 9)

# 将标签转化为绘图颜色

mergedColors = labels2colors(net$colors)

# 绘制树状图和下面的模块颜色

plotDendroAndColors(net$dendrograms[[1]],

mergedColors[net$blockGenes[[1]]],

"Module colors",

dendroLabels = FALSE, hang = 0.03,

addGuide = TRUE, guideHang = 0.05)

#保存数据

moduleLabels = net$colors

moduleColors = labels2colors(net$colors)

#

MEs = net$MEs;

geneTree = net$dendrograms[[1]];

save(MEs, moduleLabels, moduleColors, geneTree,

file = "")

#=================================================

====================================

# 加载第一部分中保存的表达式和特征数据

lnames = load(file = "");

#变量的lname包含了加载变量的名称。

lnames

# 加载在第二部分中保存的网络数据。

lnames = load(file = "");

lnames

# 定义基因和样本的数量

nGenes = ncol(datExpr);

nSamples = nrow(datExpr);

# 用彩色标签重新计算MEs

MEs0 = moduleEigengenes(datExpr, moduleColors)$eigengenes

MEs = orderMEs(MEs0)

moduleTraitCor = cor(MEs, datTraits, use = "p");

moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);

sizeGrWindow(10,6)

# 将显示相关性及其p值,

textMatrix = paste(signif(moduleTraitCor, 2), "n(",

signif(moduleTraitPvalue, 1), ")", sep = "");

dim(textMatrix) = dim(moduleTraitCor)

par(mar = c(6, 8.5, 3, 3));

# 在一个热图图中显示相关值

labeledHeatmap(Matrix = moduleTraitCor,

xLabels = names(datTraits),

yLabels = names(MEs),

ySymbols = names(MEs),

colorLabels = FALSE,

colors = greenWhiteRed(50),

textMatrix = textMatrix,

setStdMargins = FALSE,

= 0.5,

zlim = c(-1,1),

main = paste("Module-trait relationships"))

# 定义包含数据特征权重列的变量权重

weight = (datTraits$weight_g);

names(weight) = "weight"

# 模块的名称(颜色)

modNames = substring(names(MEs), 3)

#gene和模块的关系

geneModuleMembership = (cor(datExpr, MEs, use = "p"));

MMPvalue =

(corPvalueStudent((geneModuleMembership), nSamples));

names(geneModuleMembership) = paste("MM", modNames, sep="");

names(MMPvalue) = paste("", modNames, sep="");

#gene和性状的关系

geneTraitSignificance = (cor(datExpr, weight, use = "p"));

GSPvalue = (corPvalueStudent((geneTraitSignificance),

nSamples));

names(geneTraitSignificance) = paste("GS.", names(weight), sep="");

names(GSPvalue) = paste(".", names(weight), sep="");

module = "brown"

column = match(module, modNames);

moduleGenes = moduleColors==module;

sizeGrWindow(7, 7);

par(mfrow = c(1,1));

verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),

abs(geneTraitSignificance[moduleGenes, 1]),

xlab = paste("Module Membership in", module, "module"),

ylab = "Gene significance for body weight",

main = paste("Module membership vs. gene significancen"),

= 1.2, = 1.2, = 1.2, col = module)

names(datExpr)

names(datExpr)[moduleColors=="brown"]

#把与体重相关的模块和基因注释信息关联

annot = (file = "");

dim(annot)

names(annot)

probes = names(datExpr)

probes2annot = match(probes, annot$substanceBXH)

# The following is the number or probes without annotation:

sum((probes2annot))

# Should return 0.

# 现在,创建一个数据框架

geneInfo0 = (substanceBXH = probes,

geneSymbol = annot$gene_symbol[probes2annot],

LocusLinkID = annot$LocusLinkID[probes2annot],

moduleColor = moduleColors,

geneTraitSignificance,

GSPvalue)

# 按权重对体重进行排序

modOrder = order(-abs(cor(MEs, weight, use = "p")));

# 按给定的顺序中添加模块成员信息

for (mod in 1:ncol(geneModuleMembership))

{

oldNames = names(geneInfo0)

geneInfo0

modOrder[mod]],

= (geneInfo0, geneModuleMembership[,

MMPvalue[, modOrder[mod]]);

names(geneInfo0) = c(oldNames, paste("MM.", modNames[modOrder[mod]],

sep=""),

paste(".", modNames[modOrder[mod]], sep=""))

}

# 首先通过模块颜色来排列基因信息变量的基因,然后根据基因的重要性

geneOrder = order(geneInfo0$moduleColor, -abs(geneInfo0$));

geneInfo = geneInfo0[geneOrder, ]

(geneInfo, file = "")