cdkbook

PubChemDownloadProperties.groovy

Source code:

@Grab(group='org.openscience.cdk', module='cdk-bundle', version='2.9')

import java.net.UnknownHostException;
import org.openscience.cdk.*
import org.openscience.cdk.io.*
import org.openscience.cdk.silent.*


cid = 5282253
try {
reader = new PCCompoundXMLReader(
  new URL(
    "https://pubchem.ncbi.nlm.nih.gov/summary/summary.cgi?cid=$cid&disopt=SaveXML"
  ).newInputStream()
)
mol = reader.read(new AtomContainer())
mol.properties.each {
  line = "" + it
 line = line.replaceAll("\\\\", "\\\\textbackslash")
  println line
}
} catch (UnknownHostException exception) {
  println "FIXME: compiled without internet connection"
}

Output:

PubChem CID=5282253
Compound Complexity=510
Fingerprint (SubStructure Keys)=00000371E0723800000000000000000000000000000160...
  000000000000000000000000000000001E00100800000D28C18004020802C00200880220D208...
  000000002000000808818800080A001200812004400004D000988003BC7F020E800000000000...
  00000000000000000000000000000000
IUPAC Name (Allowed)=(2S,3S,4S)-3-(carboxymethyl)-4-[(1Z,3E,5R)-5-carboxy-1-me...
  thyl-hexa-1,3-dienyl]pyrrolidine-2-carboxylic acid
IUPAC Name (CAS-like Style)=(2S,3S,4S)-4-[(2Z,4E,6R)-6-carboxyhepta-2,4-dien-2...
  -yl]-3-(carboxymethyl)-2-pyrrolidinecarboxylic acid
IUPAC Name (Markup)=(2<I>S</I>,3<I>S</I>,4<I>S</I>)-4-[(2<I>Z</I>,4<I>E</I>,6<...
  I>R</I>)-6-carboxyhepta-2,4-dien-2-yl]-3-(carboxymethyl)pyrrolidine-2-carbox...
  ylic acid
IUPAC Name (Preferred)=(2S,3S,4S)-4-[(2Z,4E,6R)-6-carboxyhepta-2,4-dien-2-yl]-...
  3-(carboxymethyl)pyrrolidine-2-carboxylic acid
IUPAC Name (Systematic)=(2S,3S,4S)-3-(2-hydroxy-2-oxoethyl)-4-[(2Z,4E,6R)-6-me...
  thyl-7-oxidanyl-7-oxidanylidene-hepta-2,4-dien-2-yl]pyrrolidine-2-carboxylic...
   acid
IUPAC Name (Traditional)=(2S,3S,4S)-3-(carboxymethyl)-4-[(1Z,3E,5R)-5-carboxy-...
  1-methyl-hexa-1,3-dienyl]proline
InChI (Standard)=InChI=1S/C15H21NO6/c1-8(4-3-5-9(2)14(19)20)11-7-16-13(15(21)2...
  2)10(11)6-12(17)18/h3-5,9-11,13,16H,6-7H2,1-2H3,(H,17,18)(H,19,20)(H,21,22)/...
  b5-3+,8-4-/t9-,10+,11-,13+/m1/s1
InChIKey (Standard)=VZFRNCSOCOPNDB-AOKDLOFSSA-N
Log P (XLogP3-AA)=-1.3
Mass (Exact)=311.13688739
Molecular Formula=C15H21NO6
Molecular Weight=311.33
SMILES (Canonical)=CC(C=CC=C(C)C1CNC(C1CC(=O)O)C(=O)O)C(=O)O
SMILES (Isomeric)=C[C@H](/C=C/C=C(/C)\textbackslash[C@H]1CN[C@@H]([C@H]1CC(=O)...
  O)C(=O)O)C(=O)O
Topological (Polar Surface Area)=124
Weight (MonoIsotopic)=311.13688739