Details of additional tutorials will appear here soon.
The remainder of this page details some common tasks that can be addressed with bio3d:
[Under Construction]
Lets start by using bio3d to simply superpose some PDB structures
library(bio3d)
##- Setup: get all PDB file names from a certain dir 'path'
path <- "/net/home/bgrant/mypdb_dir"
files <- list.files(path=path,
pattern="[pdb]$",
full.names=TRUE)
##- Sequence alignment
seq <- NULL
for(i in 1:length(files)) {
pdb <- read.pdb( files[i] )
seq <- seqbind(seq, aa321(pdb$atom[pdbs$calpha, "resid"]))
}
##* Make sure muscle is in your PATH, see help(seqaln) *##
aln <- seqaln(seq, file="aln.fa", id=basename(files) )
pdbs <- read.fasta.pdb(aln, pdb.path=path, pdbext="")
##- Superpose structures (on all equivalent positions: we will do better later)
xyz <- fit.xyz(pdbs$xyz[1,],pdbs,
pdb.path=path, pdbext ="",
full.pdbs=TRUE)
Example data is included with the Bio3D package and can be loaded with the data(kinesin), data(hivp), and data(lysozyme) commands.
Run demo(bio3d) and example(bio3d).
TO DO Should be able to run demo(bio3d). Put titles on plots and have an interactive "push return for next plot"
library(bio3d)
pdb <- read.pdb(system.file("examples/1bg2.pdb", package="bio3d"))
pdb.summary(pdb)
s <- aa321(pdb$seqres) write.fasta(id="seqres", seq=s , file="eg.fa")
s <- aa321(pdb$atom[pdb$calpha,"resid"]) # Lets append this to the existing FASTA file write.fasta(id="atom", seq=s , file="eg.fa", append = TRUE)
sa <- aa321(pdb$seqres) sb <- aa321(pdb$atom[pdb$calpha,"resid"]) sab <- seqaln(seqbind(sa,sb), file="eg_ali.fa")
pdb <- read.pdb( system.file( "examples/1bg2.pdb", package = "bio3d") ) xyz <- orient.pdb(pdb) write.pdb(pdb, xyz = xyz, file = "mov1.pdb")
pdb <- read.pdb(system.file("examples/1bg2.pdb", package="bio3d"))
k <- dm(pdb, selection="calpha")
plot(k)
aln <- read.fasta(system.file("examples/hivp_xray.fa",package="bio3d"))
h <- entropy(aln)
con <- consensus(aln)
aln <- read.fasta( system.file("examples/kif1a.fa", package="bio3d") )
pdbs <- read.fasta.pdb( aln, pdbext = ".ent", pdb.path = system.file("examples/",package="bio3d") )
a <- dm(pdbs$xyz2,)
b <- dm(pdbs$xyz3,)
ddm <- a - b
plot(ddm,key=FALSE, grid=FALSE)
gaps <- unique(which(is.na(pdbs$xyz) , arr.ind=TRUE),2)
gaps <- gap.inspect(pdbs$xyz)
xyz <- fit.xyz( fixed = pdbs$xyz1,, mobile = pdbs$xyz, fixed.inds = gaps$f.inds, mobile.inds = gaps$f.inds )
rmsd(pdbs$xyz[1,gaps$f.inds], pdbs$xyz[,gaps$f.inds])
rmsd(pdbs$xyz[1,gaps$f.inds], pdbs$xyz[,gaps$f.inds], fit=TRUE)
aln <- read.fasta(system.file("examples/kinesin_xray.fa",package="bio3d"))
pdb.path = system.file("examples/",package="bio3d")
pdbs <- read.fasta.pdb(aln, pdb.path = pdb.path, pdbext = ".ent")
core <- core.find(pdbs, verbose=TRUE) #write.pdbs = TRUE,
plot(core)
xyz <- fit.xyz( fixed = pdbs$xyz1,, mobile = pdbs, fixed.inds = core$c0.5A.xyz, mobile.inds = core$c0.5A.xyz)
# or fit and write output
xyz <- fit.xyz( fixed = pdbs$xyz1,, mobile = pdbs, fixed.inds = core$c0.5A.xyz, mobile.inds = core$c0.5A.xyz, full.pdbs = TRUE, pdb.path = pdb.path, pdbext = ".ent", outpath = "fitlsq/")
cut.seqs <- which(pdbs$id %in% c("d1n6mb_","d1ry6a_"))
# Ignore gap containing positions
gaps.res <- gap.inspect(pdbs$ali[-cut.seqs,])
gaps.pos <- gap.inspect(pdbs$xyz[-cut.seqs,])
#-- Do PCA
pc.xray <- pca.xyz(xyz[-cut.seqs, gaps.pos$f.inds])
# Plot results
plot(pc.xray)
x11()
plot.pca.loadings(pc.xray$U)
a <- mktrj.pca(pc.xray, pc=1, file="pc1.pdb", resno = pdbs$resno[1, gaps.res$f.inds], resid = aa123(pdbs$ali[1, gaps.res$f.inds]) )
trtfile <- system.file("examples/hivp.dcd", package="bio3d")
trj <- read.dcd(trtfile)
# Read the starting PDB file to determine atom correspondence
pdbfile <- system.file("examples/hivp.pdb", package="bio3d")
pdb <- read.pdb(pdbfile)
NetCDF binary trajectory files are supported by the AMBER modules sander, pmemd and ptraj; by the molecular graphics program VMD and by bio3d. Compared to formatted trajectory files, these binary trajectory files are smaller, higher precision and significantly faster to read and write. They are also much nicer to work with that CHARMM format DCD files.
trj <- read.ncdf("some_traj_file.nc")
# Read the starting PDB file to determine atom correspondence
pdb <- read.pdb("some_pdb_file.pdb")
inds <- atom.select(pdb,"///23:31,84:87///CA/")
fit.xyz <- fit.xyz(pdb$xyz, trj, fixed.inds=inds$xyz, mobile.inds=inds$xyz)
write.ncdf(fit.xyz, "new_fit_trj.nc")
r <- rmsd(a=pdb, b=fit.xyz)
pc.trj <- pca.xyz(fit.xyz)
plot(pc.trj)
plot.pca.loadings(pc.trj)
a <- mktrj.pca(pc.trj, pc=1, file="pc1_trj.pdb")
# other examples to include: sdm, clustering etc....
use entropy(), identity() and consensus()
?read.fasta(), seqaln()
use rmsd(), rmsf() etc.
see ?dm
Use the read.fasta() function
aln <- read.fasta("eg.fa")
The 'aln' object is a list with $id and $ali components try the cmd 'attributes(aln)'
Start R and load the bio3d package as described above.
Then use the read.dcd() function to read your trajectory
Note. You can use VMD to write a dcd format trajectory from multi-model PDBs and other common trajectory formats.
| trj<-read.dcd("prot.dcd") |
This will create a trajectory matrix called 'trj' from the file 'prot.dcd' in the current directory.
To find the number of structures in your 'trj' data structure try:
| nrow(trj) |
and to find the number of atoms use
| ncol(trj)/3 |
Note that each row corresponds to a frame from your trajectory and each coloum coresponds to a an atom coordinate. DCD files contain no atom type information but this can be convinently obtained from a coresponding PDB file.
Start R and load the bio3d package as described above.
Then use the read.pdb() function
| pdb.1<-read.pdb("complex_cgbd_3000-5000.pdb") |
| pdb.2<-read.pdb("1HPV.pdb") |
To see whats in 'pdb.1' type
| attributes(pdb.1) |
To see ATOM records
| pdb.1$atom |
To see coords as a numeric vector
| pdb.1$xyz |
Read your PDB
| pdb.1<-read.pdb("complex_cgbd_3000-5000.pdb") |
Change residue names to upper case
| pdb.1$atom[,"resid"]<-toupper(pdb.1$atom[,"resid"]) |
Write out a new PDB
| write.pdb(pdb.1, file="complex_cgbd_3000-5000_tidy.pdb") |
Or write with no chain id
| write.pdb(pdb.1, file="complex_cgbd_3000-5000_tidy.pdb",chain=rep(" ",nrow(pdb.1$atom)) |
to get sequence from 'pdb.1' (rember to change case to Upper)
| seq.1 <- aa321(toupper(pdb$atom[pdb$calpha,"resid"])) |
| seq.2 <- aa321(pdb.2$atom[pdb.2$calpha,"resid"]) |
Or take the sequence from the SEQRES records
| seq.3 <- aa321(pdb$seqres) |
To write a sequence file you need an 'laignment' style object which is nothing more than a list object with an "id" and "ali" components
| write.fasta(list( id=c("complex_cgbd_3000-5000_tidy"),ali=seq.1 ), file="eg.fa") |
| write.fasta(list( id=c("1HPV"),ali=seq.2 ), file="eg.fa", append=TRUE) |
If they are related, and you want to keep track for their correspondances, use the function read.fasta.pdb() and input a sequence alignment of the PDBs
| aln <- read.fasta("eg.fa") |
| pdbs<-read.fasta.pdb(aln) |
The followin is a basic example of how to use scan() to read AMBER mden files produced by Sander with the ntwe option.
To use
To do some basic plotting
Use seqaln() function
Can be taken from PDB file or calculated with stride or dssp
pdb <- read.pdb("~/work/ctbp/stru/1mx3.pdb")
pdb$helix
pdb$sheet # start and end residue numbers
sse <- dssp(pdb)
sse$helix
sse$sheet # note these are residue indices not residue numbers
sse <- stride(pdb) # as above for dssp
pdb <- read.pdb("~/work/ctbp/stru/1mx3.pdb")
sse <- dssp(pdb)
sse.seq <- rep("-", length=sum(pdb$calpha))
h <- unbound(sse$helix$start, sse$helix$end)
e <- unbound(sse$sheet$start, sse$sheet$end)
sse.seq[h]="H"; sse.seq[e]="E"
Page Information
|
Wiki Information |
Recent PBwiki Blog Posts |