Freecontact/src/ write bioxsd.cpp
From Rost Lab Open
/* FreeContact - program to predict protein residue contacts from a sufficiently large multiple alignment * Copyright (C) 2012-2013 Laszlo Kajan <lkajan@rostlab.org> Rost Lab, Technical University of Munich, Germany * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <iostream> #include "fc.h" #include "freecontact_input_xsd.hxx" #include "freecontact_output_xsd.hxx" #include "freecontact.h" using namespace std; namespace bo = boost; namespace fc = freecontact; namespace fix = freecontact_input_xsd; namespace fox = freecontact_output_xsd; auto_ptr<BioXSD::Score> _get_bxScore(const fc::contact_t& __ct, const std::string& __scid); // forward declaration in freecontact.cpp std::string _xml_flatali(std::istream& __is, bool dbg) { string ret; try { auto_ptr<fix::FcAlignment> xml( fix::alignment(__is) ); ret = xml->flat(); // lkajan: this may be empty, as <alignment> is a choice of <flat>,... } catch (const xml_schema::exception& e) { cerr << e << "\n"; throw; } // return ret; } // forward declaration in freecontact.cpp void _write_bioxsd(const fc::ali_t& __ali, const resn_mapper_t &__resn, const map<string, vector<fc::contact_t> >& __res, bool dbg) { // XML generation - deepest first auto_ptr<BioXSD::GeneralSequenceSegment> seqSeg_p(new BioXSD::SequenceSegment()); seqSeg_p->min( __resn.qs ); seqSeg_p->max( __resn.qs+__resn.q.length()-1 ); auto_ptr<BioXSD::SequencePosition> seqPos_p(new BioXSD::SequencePosition()); seqPos_p->segment().push_back( seqSeg_p ); auto_ptr<BioXSD::SequenceReference> seqRef_p(new BioXSD::SequenceReference()); seqRef_p->subsequencePosition( seqPos_p ); string ucq(__resn.q); int (*tou_p)(int) = toupper; transform( __resn.q.begin(), __resn.q.end(), ucq.begin(), tou_p ); auto_ptr<BioXSD::BiosequenceRecord> seqRec_p( new BioXSD::GeneralAminoacidSequenceRecord( BioXSD::GeneralAminoacidSequence( ucq ))); seqRec_p->reference( seqRef_p ); //seqRec_p->name("query"); // lkajan: name is not mandatory auto_ptr<BioXSD::referenceSequence> refSeq_p(new BioXSD::referenceSequence()); refSeq_p->sequenceRecord( seqRec_p ); BioXSD::FeatureRecord featRec( refSeq_p ); auto_ptr<BioXSD::SemanticConcept> metCatCon_p(new BioXSD::SemanticConcept()); metCatCon_p->ontologyName("EDAM"); metCatCon_p->accession("operation_0272"); metCatCon_p->conceptUri("http://edamontology.org/operation_0272"); metCatCon_p->term("Residue interaction prediction"); auto_ptr<BioXSD::EntryReference> metCit_p(new BioXSD::EntryReference()); metCit_p->date(xml_schema::date(2013, 3, 1)); auto_ptr<BioXSD::method1> method_p(new BioXSD::method1("freecontact", "M#fc")); method_p->categoryConcept().push_back( metCatCon_p ); method_p->citation().push_back( metCit_p ); auto_ptr<BioXSD::scoreType> sc_l1_p(new BioXSD::scoreType("S#l1")); { auto_ptr<BioXSD::EntryReference> metCit_p(new BioXSD::EntryReference()); metCit_p->dbName("PubMed"); metCit_p->accession("22101153"); metCit_p->entryUri("http://www.ncbi.nlm.nih.gov/pubmed/22101153"); metCit_p->date(xml_schema::date(2011, 11, 17)); auto_ptr<BioXSD::Method> method_p(new BioXSD::Method("l1-norm")); method_p->citation().push_back( metCit_p ); sc_l1_p->method().push_back( method_p ); } auto_ptr<BioXSD::scoreType> sc_MI_p(new BioXSD::scoreType("S#MI")); { auto_ptr<BioXSD::EntryReference> metCit_p(new BioXSD::EntryReference()); metCit_p->dbName("PubMed"); metCit_p->accession("22163331"); metCit_p->entryUri("http://www.ncbi.nlm.nih.gov/pubmed/22163331"); metCit_p->date(xml_schema::date(2011, 12, 7)); auto_ptr<BioXSD::Method> method_p(new BioXSD::Method("Mutual information")); method_p->citation().push_back( metCit_p ); sc_MI_p->method().push_back( method_p ); } auto_ptr<BioXSD::scoreType> sc_fr_p(new BioXSD::scoreType("S#fr")); { auto_ptr<BioXSD::EntryReference> metCit_p(new BioXSD::EntryReference()); metCit_p->dbName("PubMed"); metCit_p->accession("23410359"); metCit_p->entryUri("http://www.ncbi.nlm.nih.gov/pubmed/23410359"); metCit_p->date(xml_schema::date(2013, 1, 11)); auto_ptr<BioXSD::Method> method_p(new BioXSD::Method("Frobenius norm")); method_p->citation().push_back( metCit_p ); sc_fr_p->method().push_back( method_p ); } auto_ptr<BioXSD::SemanticConcept> featCon_p(new BioXSD::SemanticConcept()); // TODO: find seq ontology term for contact //featCon_p->ontologyName("EDAM"); //featCon_p->accession("data_1547"); //featCon_p->conceptUri("http://edamontology.org/data_1547"); featCon_p->term("Proximity of residues (Cb-Cb<8A), 'contact'"); auto_ptr<BioXSD::FeatureType> feat_p(new BioXSD::FeatureType()); feat_p->name("Proximity of residues"); feat_p->equalConcept().push_back( featCon_p ); auto_ptr<BioXSD::condensedReferences1> cref_p(new BioXSD::condensedReferences1()); cref_p->methodIdRef("M#fc"); auto_ptr<BioXSD::annotation1> ann_p(new BioXSD::annotation1(feat_p)); ann_p->condensedReferences( cref_p ); // occurrences size_t l1norm_i = 0, MI_i = 0, fro_i = 0; for(size_t i = 0; i < __ali.alilen; ++i) for(size_t j = i+1; j < __ali.alilen; ++j) { // create only those positions that we have results for bool has_l1norm = false, has_MI = false, has_fro = false; if( ( has_l1norm = __res.at("l1norm")[l1norm_i].i == i && __res.at("l1norm")[l1norm_i].j == j ) | ( has_MI = __res.at("MI")[MI_i].i == i && __res.at("MI")[MI_i].j == j ) | ( has_fro = __res.at("fro")[fro_i].i == i && __res.at("fro")[fro_i].j == j )) { auto_ptr<BioXSD::GeneralSequencePoint> pt1_p(new BioXSD::SequencePoint()); pt1_p->pos( __resn(i) ); auto_ptr<BioXSD::GeneralSequencePoint> pt2_p(new BioXSD::SequencePoint()); pt2_p->pos( __resn(j) ); auto_ptr<BioXSD::GeneralSequencePosition> pos_p(new BioXSD::SequencePosition()); pos_p->point().push_back( pt1_p ); pos_p->point().push_back( pt2_p ); auto_ptr<BioXSD::occurrence> occ_p(new BioXSD::occurrence()); occ_p->position( pos_p ); // lkajan: option: put the scores in <evidence><predicted><score/>+</pr></ev> if(has_l1norm) occ_p->score().push_back( _get_bxScore( __res.at("l1norm")[l1norm_i++], "S#l1" )); if(has_MI) occ_p->score().push_back( _get_bxScore( __res.at("MI")[MI_i++], "S#MI" )); if(has_fro) occ_p->score().push_back( _get_bxScore( __res.at("fro")[fro_i++], "S#fr" )); ann_p->occurrence().push_back( occ_p ); } } auto_ptr<BioXSD::blockWithOccurrenceReferences> occRef_p(new BioXSD::blockWithOccurrenceReferences()); occRef_p->method().push_back( method_p ); occRef_p->scoreType().push_back( sc_l1_p ); occRef_p->scoreType().push_back( sc_MI_p ); occRef_p->scoreType().push_back( sc_fr_p ); occRef_p->annotation().push_back( ann_p ); featRec.blockWithOccurrenceReferences().push_back( occRef_p ); // serialize xml_schema::namespace_infomap map; map["fox"].name = "http://rostlab.org/freecontact/output"; map["fox"].schema = "http://rostlab.org/~lkajan/freecontact_output.xsd"; map["bx"].name = "http://bioxsd.org/BioXSD-1.1"; // lkajan: do not give this location, the information is redundant, it is in freecontact.xsd //map["bx"].schema = "http://rostlab.org/~lkajan/BioXSD-1.1-lkajan1.xsd"; fox::contactMap( cout, featRec, map ); //fox::contactMap( cout, featRec, map, "UTF-8", xml_schema::flags::dont_pretty_print ); } auto_ptr<BioXSD::Score> _get_bxScore( const fc::contact_t& __ct, const std::string& __scid ) { auto_ptr<BioXSD::Score> MI_score_p(new BioXSD::Score( bo::str(bo::format("%g") % __ct.score) )); MI_score_p->scoreTypeIdRef(__scid); return MI_score_p; } // vim:et:ts=4:ai: