# grep the name list for SVM training: protein, cell, interaction, process # 1. "grep" against >=2 gram terms # 2. the regular expressions are in Perl format # 3. In most cases, the "grep" is omitted; in a few cases, when pipe is used, the complete shell command is listed, e.g. # grep -P 'cell' | grep -P 'lines?$' # 4. Option -v stands for "NOT" #Protein/Gene Names: '(channels?)$' '(receptors?)$' '(transporters?)$' grep -P '((enzymes?)|(ases?))( (.|I{1,3}|(IV)))?(-\d)?$' | grep -vP '(coenzyme)|((\t| )(base|phase)s?( |$))|(release$)' '^protein|proteins?$' '(mutant )|(mutants?$)' 'complex(es)?$' 'CaM|hsp|(14-3-3|p53|p38|AChR|DnaK|EGFR|eNOS|eIF4H|ERK|GIRK|GRK|JNK|MAPK|SAPK|STAT|TFIIF)\d?$' 'G(i|o|s) ?(alpha|beta|gamma)' #Cell Names: 'cells?$' grep -P 'cell' | grep -P 'lines?$' '((neuron)|(blast)|(cyte))s?$' Interaction Names: 'binding$' 'phosphorylation$' 'activations?$' 'inhibitions?$' '(interaction)|(merization)|(recognition)|( action)s?$' 'hydrolysis$' #Biological Process Names: '(activit(y|(ies)))$' 'release$' 'transfer$' 'exchange$' 'expressions?$' 'growth$' grep -P '((repair)|(damage)|(apoptosis)|(sion)|(tion)|(synthesis)|(influx)|(efflux)|(import)|(shock)|(transit)|(transport)|(uptake))$' | grep -vP ' cation$' grep -P '^((ca2)|(calcium))' | grep -vP '(cyt)|(calmodulin)|(CaM)'