New Functions

Details of functions that should soon be added to the bio3d package.


Input

Functions for reading data from other file types:

read AMBER log/mden files

DONE read.mden()

read AMBER netCDF binary trajectorys

see package RnetCDF

read CHARMM log files

not started

read NAMD log files

not started

read CHARMM crd files

DONE read.crd()

read.crd <- function(file, verbose = TRUE) {

split.string <- function(x) {
x <- substring(x, first, last)
xnchar(x) == 0 <- as.character(NA)
x
}
trim <- function(s) {
s <- sub("^ +", "", s)
s <- sub(" +$", "", s)
s[(s == "")] <- NA
s
}

atom.format <- c(5, 5, 4, 5, -1, 10,10,10, -1, 4, -1, 4, 10)
atom.names <- c("eleno", "resno", "resid", "elety",
"blank", "x", "y", "z", "blank",
"segid", "blank", "resno2", "b")

widths <- abs(atom.format)
drop.ind <- (atom.format < 0)

st <- c(1, 1 + cumsum(widths))
first <- st-length(st)[!drop.ind]
last <- cumsum(widths)[!drop.ind]

raw.lines <- readLines(file)

head.ind <- which(substr(raw.lines,1,1)=="*")
head.ind <- c(head.ind, (head.ind[length(head.ind)]+1) )

if(verbose)
cat(raw.lines[head.ind],sep="\n")
atom <- matrix(trim(sapply(raw.lines[-head.ind], split.string)),
byrow = TRUE,
ncol = length(atom.format[!drop.ind]),
dimnames = list(NULL,
atom.names[!drop.ind]) )

output <- list(atom = atom,
xyz = as.numeric(t(atom[, c("x", "y", "z")])),
calpha = as.logical(atom[, "elety"] == "CA"))

return(output)
}

Output

write.dcd

The ability to output a binary dcd file from any coordinate matrix.
started but incomplete

write netcdf

see package RnetCDF

write.crd

Output CHARMM crd coordinate file

Manipulation

convert.pdb

Convert between CHARMM, AMBER, GROMACS and PDB format

# read a PDB file
pdb<-read.pdb("1mx3.pdb")
# convert to CHARMM format
charmm<-convert.pdb(pdb, type="charmm", renumber=T)
# write converted PDB
write.pdb(charmm, file="in.pdb")

Function convert.pdb:

convert.pdb <- function(pdb, type, renumber=FALSE, first.resno=1, first.eleno=1, rm.h=TRUE, rm.wat=FALSE) {

options <- c("pdb","charmm","amber","gromics")
type <- match.arg(tolower(type), options)

cat(paste(" Converting to", type, "format"),"\n")

## residue types from PDB
restype <- unique(pdb$atom[,"resid"])

## different his resids
his <- matrix( c("HIS", "HSD", "HID","HISA",
"HIS", "HSE", "HIE","HISB",
"HIS", "HSP", "HIP","HISH"),
nrow=3, byrow=TRUE,
dimnames = list(c("d","e","b"),
c("pdb","charmm","amber","gromics")) )

standard <- c("MET", "PRO", "LEU", "VAL", "ALA",
"ASP", "GLY", "ARG", "CYS", "THR",
"GLU", "ILE", "LYS", "PHE", "GLN",
"SER", "ASN", "TYR", "TRP", his)

non.standard <- !(restype %in% standard)

if(sum( non.standard ) > 0) {
cat(paste(" Found ",sum( non.standard ),
" non standard residues:"),
restype[non.standard], sep="\n")
}

## Convert HIS resid
type.inds <- (colnames(his) %in% type)
conv.inds <- !(colnames(his) %in% c(type,"pdb"))

his.d.ind <- (pdb$atom[,"resid"] %in% his["d", !type.inds ])
his.e.ind <- (pdb$atom[,"resid"] %in% his["e", conv.inds ])
his.b.ind <- (pdb$atom[,"resid"] %in% his["b", conv.inds ])

pdb$atom[his.d.ind,"resid"] <- his["d", type.inds ]
pdb$atom[his.e.ind,"resid"] <- his["e", type.inds ]
pdb$atom[his.b.ind,"resid"] <- his["b", type.inds ]

## Convert ILE CD elety
if (type=="charmm") {
ile.ind <- colSums( rbind((pdb$atom[,"elety"] %in% "CD1"),
(pdb$atom[,"resid"] %in% "ILE"))) ==2
pdb$atom[ile.ind,"elety"] <- "CD"
pdb$atom[,"chain"]=NA # strip chain
} else {
ile.ind <- colSums( rbind((pdb$atom[,"elety"] %in% "CD"),
(pdb$atom[,"resid"] %in% "ILE"))) ==2
pdb$atom[ile.ind,"elety"] <- "CD1"
}

## Strip hydrogen
if(rm.h) {
h.ind <- which(substr(pdb$atom[,"elety"],1,1) %in% "H")
if(length(h.ind) > 0) {
pdb$atom <- pdb$atom[-h.ind,]
pdb$xyz <- pdb$xyz[-c((h.ind*3)-2, (h.ind*3)-1, (h.ind*3))]
pdb$calpha <- pdb$calpha[-h.ind,]
}
}
## Strip water
if(rm.wat) {
wat <- c("H2O", "OH2", "HOH", "HHO", "OHH", "SOL",
"WAT", "TIP", "TIP2", "TIP3", "TIP4")

wat.ind <- which(pdb$atom[,"resid"] %in% wat)
if(length(wat.ind) > 0) {
pdb$atom <- pdb$atom[-wat.ind,]
pdb$xyz <- pdb$xyz[-c((wat.ind*3)-2, (wat.ind*3)-1, (wat.ind*3))]
pdb$calpha <- pdb$calpha[-wat.ind,]
}
}

## Renumber
if(renumber) {
nums <- as.numeric(pdb$atom[,"resno"])
pdb$atom[,"resno"] <- nums - (nums[1] - first.resno)
pdb$atom[,"eleno"] <- seq(first.eleno, length=nrow(pdb$atom))
}

## (split by chain or segid)
#unique(pdb$atom[,"chain"])
#unique(pdb$atom[,"segid"])

return(pdb)
}

Analysis

torsion analysis

A function for basic phi, psi, omega and chi dihedral anlysis. Should probably call a more general angle function which can also be called for sse angle analysis, hbond analysis, Ramachandran plots etc.

Already have SOLW phi psi analysis with the secondary structure functions stride and dssp (see below). These are a little too slow for trajectory analysis.

hbond analysis

Build on angle and distance functions employing a default distance cutoff of 2.5 angstroms (0.25 nm) and a default angle cutoff of 60 degrees (i.e. bond angle must be 60 degrees or greater). The angle cutoff is rather liberal. Some researchers prefer a more stringent angle cutoff of 120 degrees.

As an example doc Rd file look at possible salt bridges i.e. distances and angles between charged plus/minus residues

rmsf

Got this with sd. Should use rowSums() rather than apply() e.g. for the trj like matrixxyz

rmsf <- sqrt(rowSums(matrix( apply(xyz, 2, sd), ncol=3, byrow=TRUE)^2))

radius of gyration

compute the radius of gyration for selected atoms and the radii of gyration about the x, y and z axes. Should we have an option to mass weight atoms or just take Calphas?. For the moment take the mass from a PQR input file (B col).

 pdb <- read.pqr("1L2y.pqr")

 xyz <- as.vec3d(pdb$xyz)
 mass <- as.numeric(pdb$atom[,"b"])
 
 ## distance from centroid
 centered <- sweep(xyz, 2, apply(xyz,2,mean))
 rg <- sqrt( sum( (apply(centered, 1, vec.norm)^2)*mass)/sum(mass) )
 

cross-correlation

have this somewhere

Time series analysis

For the moment just use acf()

secondary structure analysis

Add dssp and stride functions (see below) to package. Note the user needs to have installed dssp and stride. I can ship a stride exicuitable but not dssp due to license restrictions. The simpliest way to get stride is to link from a VMD instalation.
Dssp can also be used to produce a crude solvent accesible surface area measure for pdbs or trj frames.

#STRIDE
stride <- function(pdb, exepath = "~/bin/") {
infile <- tempfile()
outfile <- tempfile()
write.pdb(pdb, file=infile)
system( paste(exepath,"stride -f",outfile," ",infile,sep="") )
raw.lines <- readLines(outfile)
type <- substring(raw.lines, 1, 3)
unlink(c(infile, outfile))

raw.loc <- raw.lines[type == "LOC"]
raw.tor <- raw.lines[type == "ASG"]

phi <- as.numeric(substring(raw.tor, 43,49))
psi <- as.numeric(substring(raw.tor, 53,59))

start <- as.numeric(substring(raw.loc, 23,27))
end <- as.numeric(substring(raw.loc, 42,45))
chain <- substring(raw.loc, 29,29)

sse <- substring(raw.loc, 6,9)

h.ind <- sse == "Alph"
g.ind <- sse == "310H"
e.ind <- sse == "Stra"
t.ind <- sse == "Turn"

out <- list(helix = list(start = c(start[h.ind], start[g.ind]), end = c(end[h.ind], end[g.ind]), length = ( c(end[h.ind], end[g.ind]) - c(start[h.ind], start[g.ind]) + 1), chain = c(chain[h.ind], chain[g.ind]) ),
sheet = list(start = start[e.ind], end = end[e.ind], length = (end[e.ind] - start[e.ind] + 1), chain = chain[e.ind]),
turn = list(start = start[t.ind], end = end[t.ind], length =(end[t.ind] - start[t.ind] + 1), chain = chain[t.ind]),
phi = phi, psi = psi)

}

#DSSP
dssp <- function(pdb, exepath = "~/bin/") {
infile <- tempfile()
outfile <- tempfile()
write.pdb(pdb, file=infile)
system( paste(exepath, "dssp -c ", infile," ", outfile, sep=""), ignore.stderr = TRUE )
raw.lines <- readLines(outfile)
unlink(c(infile, outfile))
type <- substring(raw.lines, 1, 3)
raw.lines <- raw.lines[-(1:which(type==" #"))]

cha <- substring(raw.lines, 12, 12)
sse <- substring(raw.lines, 17, 17)
phi <- substring(raw.lines, 96, 101)
psi <- substring(raw.lines, 102, 107)

h <- bounds(which(sse=="H"))
g <- bounds(which(sse=="G"))
e <- bounds(which(sse=="E"))
t <- bounds(which(sse=="T"))

out <- list(helix = list(start = c(h[,"start"], g[,"start"]), end = c(h[,"end"], g[,"end"]), length = c(h[,"length"], g[,"length"]), chain = c(cha[h[,"start"]], cha[g[,"start"]]) ),
sheet = list(start = e[,"start"], end = e[,"end"], length = e[,"length"], chain = cha[e[,"start"]] ),
turn = list(start = t[,"start"], end = t[,"end"], length = t[,"length"], chain = cha[t[,"start"]] ),
phi = phi, psi = psi)
}

seqaln

Got this already but user needs to install mafft

For methods '"mafft"' and '"clustalx"' a system call is made to the
tool given by 'method', and the respective program must be
installed on your system and be in the search path for
executables.


Page Information

  • 1 year ago [history]
  • View page source
  • You're not logged in
  • Tags: todo

Wiki Information

Recent PBwiki Blog Posts