New Functions
Details of functions that should soon be added to the bio3d package.
Input
Functions for reading data from other file types:
read AMBER log/mden files
DONE read.mden()
read AMBER netCDF binary trajectorys
see package RnetCDF
read CHARMM log files
not started
read NAMD log files
not started
read CHARMM crd files
DONE read.crd()
read.crd <- function(file, verbose = TRUE) {
split.string <- function(x) {
x <- substring(x, first, last)
x
nchar(x) == 0 <- as.character(NA)
x
}
trim <- function(s) {
s <- sub("^ +", "", s)
s <- sub(" +$", "", s)
s[(s == "")] <- NA
s
}
atom.format <- c(5, 5, 4, 5, -1, 10,10,10, -1, 4, -1, 4, 10)
atom.names <- c("eleno", "resno", "resid", "elety",
"blank", "x", "y", "z", "blank",
"segid", "blank", "resno2", "b")
widths <- abs(atom.format)
drop.ind <- (atom.format < 0)
st <- c(1, 1 + cumsum(widths))
first <- st
-length(st)[!drop.ind]
last <- cumsum(widths)[!drop.ind]
raw.lines <- readLines(file)
head.ind <- which(substr(raw.lines,1,1)=="*")
head.ind <- c(head.ind, (head.ind[length(head.ind)]+1) )
if(verbose)
cat(raw.lines[head.ind],sep="\n")
atom <- matrix(trim(sapply(raw.lines[-head.ind], split.string)),
byrow = TRUE,
ncol = length(atom.format[!drop.ind]),
dimnames = list(NULL,
atom.names[!drop.ind]) )
output <- list(atom = atom,
xyz = as.numeric(t(atom[, c("x", "y", "z")])),
calpha = as.logical(atom[, "elety"] == "CA"))
return(output)
}
Output
write.dcd
The ability to output a binary dcd file from any coordinate matrix.
started but incomplete
write netcdf
see package RnetCDF
write.crd
Output CHARMM crd coordinate file
Manipulation
convert.pdb
Convert between CHARMM, AMBER, GROMACS and PDB format
# read a PDB file
pdb<-read.pdb("1mx3.pdb")
# convert to CHARMM format
charmm<-convert.pdb(pdb, type="charmm", renumber=T)
# write converted PDB
write.pdb(charmm, file="in.pdb")
Function convert.pdb:
convert.pdb <- function(pdb, type, renumber=FALSE, first.resno=1, first.eleno=1, rm.h=TRUE, rm.wat=FALSE) {
options <- c("pdb","charmm","amber","gromics")
type <- match.arg(tolower(type), options)
cat(paste(" Converting to", type, "format"),"\n")
## residue types from PDB
restype <- unique(pdb$atom[,"resid"])
## different his resids
his <- matrix( c("HIS", "HSD", "HID","HISA",
"HIS", "HSE", "HIE","HISB",
"HIS", "HSP", "HIP","HISH"),
nrow=3, byrow=TRUE,
dimnames = list(c("d","e","b"),
c("pdb","charmm","amber","gromics")) )
standard <- c("MET", "PRO", "LEU", "VAL", "ALA",
"ASP", "GLY", "ARG", "CYS", "THR",
"GLU", "ILE", "LYS", "PHE", "GLN",
"SER", "ASN", "TYR", "TRP", his)
non.standard <- !(restype %in% standard)
if(sum( non.standard ) > 0) {
cat(paste(" Found ",sum( non.standard ),
" non standard residues:"),
restype[non.standard], sep="\n")
}
## Convert HIS resid
type.inds <- (colnames(his) %in% type)
conv.inds <- !(colnames(his) %in% c(type,"pdb"))
his.d.ind <- (pdb$atom[,"resid"] %in% his["d", !type.inds ])
his.e.ind <- (pdb$atom[,"resid"] %in% his["e", conv.inds ])
his.b.ind <- (pdb$atom[,"resid"] %in% his["b", conv.inds ])
pdb$atom[his.d.ind,"resid"] <- his["d", type.inds ]
pdb$atom[his.e.ind,"resid"] <- his["e", type.inds ]
pdb$atom[his.b.ind,"resid"] <- his["b", type.inds ]
## Convert ILE CD elety
if (type=="charmm") {
ile.ind <- colSums( rbind((pdb$atom[,"elety"] %in% "CD1"),
(pdb$atom[,"resid"] %in% "ILE"))) ==2
pdb$atom[ile.ind,"elety"] <- "CD"
pdb$atom[,"chain"]=NA # strip chain
} else {
ile.ind <- colSums( rbind((pdb$atom[,"elety"] %in% "CD"),
(pdb$atom[,"resid"] %in% "ILE"))) ==2
pdb$atom[ile.ind,"elety"] <- "CD1"
}
## Strip hydrogen
if(rm.h) {
h.ind <- which(substr(pdb$atom[,"elety"],1,1) %in% "H")
if(length(h.ind) > 0) {
pdb$atom <- pdb$atom[-h.ind,]
pdb$xyz <- pdb$xyz[-c((h.ind*3)-2, (h.ind*3)-1, (h.ind*3))]
pdb$calpha <- pdb$calpha[-h.ind,]
}
}
## Strip water
if(rm.wat) {
wat <- c("H2O", "OH2", "HOH", "HHO", "OHH", "SOL",
"WAT", "TIP", "TIP2", "TIP3", "TIP4")
wat.ind <- which(pdb$atom[,"resid"] %in% wat)
if(length(wat.ind) > 0) {
pdb$atom <- pdb$atom[-wat.ind,]
pdb$xyz <- pdb$xyz[-c((wat.ind*3)-2, (wat.ind*3)-1, (wat.ind*3))]
pdb$calpha <- pdb$calpha[-wat.ind,]
}
}
## Renumber
if(renumber) {
nums <- as.numeric(pdb$atom[,"resno"])
pdb$atom[,"resno"] <- nums - (nums[1] - first.resno)
pdb$atom[,"eleno"] <- seq(first.eleno, length=nrow(pdb$atom))
}
## (split by chain or segid)
#unique(pdb$atom[,"chain"])
#unique(pdb$atom[,"segid"])
return(pdb)
}
Analysis
torsion analysis
A function for basic phi, psi, omega and chi dihedral anlysis. Should probably call a more general angle function which can also be called for sse angle analysis, hbond analysis, Ramachandran plots etc.
Already have SOLW phi psi analysis with the secondary structure functions stride and dssp (see below). These are a little too slow for trajectory analysis.
hbond analysis
Build on angle and distance functions employing a default distance cutoff of 2.5 angstroms (0.25 nm) and a default angle cutoff of 60 degrees (i.e. bond angle must be 60 degrees or greater). The angle cutoff is rather liberal. Some researchers prefer a more stringent angle cutoff of 120 degrees.
As an example doc Rd file look at possible salt bridges i.e. distances and angles between charged plus/minus residues
rmsf
Got this with sd. Should use rowSums() rather than apply() e.g. for the trj like matrixxyz
rmsf <- sqrt(rowSums(matrix( apply(xyz, 2, sd), ncol=3, byrow=TRUE)^2))
radius of gyration
compute the radius of gyration for selected atoms and the radii of gyration about the x, y and z axes. Should we have an option to mass weight atoms or just take Calphas?. For the moment take the mass from a PQR input file (B col).
pdb <- read.pqr("1L2y.pqr")
xyz <- as.vec3d(pdb$xyz)
mass <- as.numeric(pdb$atom[,"b"])
## distance from centroid
centered <- sweep(xyz, 2, apply(xyz,2,mean))
rg <- sqrt( sum( (apply(centered, 1, vec.norm)^2)*mass)/sum(mass) )
cross-correlation
have this somewhere
Time series analysis
For the moment just use acf()
secondary structure analysis
Add dssp and stride functions (see below) to package. Note the user needs to have installed dssp and stride. I can ship a stride exicuitable but not dssp due to license restrictions. The simpliest way to get stride is to link from a VMD instalation.
Dssp can also be used to produce a crude solvent accesible surface area measure for pdbs or trj frames.
#STRIDE
stride <- function(pdb, exepath = "~/bin/") {
infile <- tempfile()
outfile <- tempfile()
write.pdb(pdb, file=infile)
system( paste(exepath,"stride -f",outfile," ",infile,sep="") )
raw.lines <- readLines(outfile)
type <- substring(raw.lines, 1, 3)
unlink(c(infile, outfile))
raw.loc <- raw.lines[type == "LOC"]
raw.tor <- raw.lines[type == "ASG"]
phi <- as.numeric(substring(raw.tor, 43,49))
psi <- as.numeric(substring(raw.tor, 53,59))
start <- as.numeric(substring(raw.loc, 23,27))
end <- as.numeric(substring(raw.loc, 42,45))
chain <- substring(raw.loc, 29,29)
sse <- substring(raw.loc, 6,9)
h.ind <- sse == "Alph"
g.ind <- sse == "310H"
e.ind <- sse == "Stra"
t.ind <- sse == "Turn"
out <- list(helix = list(start = c(start[h.ind], start[g.ind]),
end = c(end[h.ind], end[g.ind]),
length = ( c(end[h.ind], end[g.ind]) -
c(start[h.ind], start[g.ind]) + 1),
chain = c(chain[h.ind], chain[g.ind]) ),
sheet = list(start = start[e.ind],
end = end[e.ind],
length = (end[e.ind] - start[e.ind] + 1),
chain = chain[e.ind]),
turn = list(start = start[t.ind],
end = end[t.ind],
length =(end[t.ind] - start[t.ind] + 1),
chain = chain[t.ind]),
phi = phi, psi = psi)
}
#DSSP
dssp <- function(pdb, exepath = "~/bin/") {
infile <- tempfile()
outfile <- tempfile()
write.pdb(pdb, file=infile)
system( paste(exepath, "dssp -c ", infile," ", outfile, sep=""), ignore.stderr = TRUE )
raw.lines <- readLines(outfile)
unlink(c(infile, outfile))
type <- substring(raw.lines, 1, 3)
raw.lines <- raw.lines[-(1:which(type==" #"))]
cha <- substring(raw.lines, 12, 12)
sse <- substring(raw.lines, 17, 17)
phi <- substring(raw.lines, 96, 101)
psi <- substring(raw.lines, 102, 107)
h <- bounds(which(sse=="H"))
g <- bounds(which(sse=="G"))
e <- bounds(which(sse=="E"))
t <- bounds(which(sse=="T"))
out <- list(helix = list(start = c(h[,"start"], g[,"start"]),
end = c(h[,"end"], g[,"end"]),
length = c(h[,"length"], g[,"length"]),
chain = c(cha[h[,"start"]], cha[g[,"start"]]) ),
sheet = list(start = e[,"start"],
end = e[,"end"],
length = e[,"length"],
chain = cha[e[,"start"]] ),
turn = list(start = t[,"start"],
end = t[,"end"],
length = t[,"length"],
chain = cha[t[,"start"]] ),
phi = phi, psi = psi)
}
seqaln
Got this already but user needs to install mafft
For methods '"mafft"' and '"clustalx"' a system call is made to the
tool given by 'method', and the respective program must be
installed on your system and be in the search path for
executables.