#include <ncbi_pch.hpp>
#include "basedata.hpp"
#include "prosite.hpp"

#if defined(__DB_OFFLINE__)
#include "basealgo.hpp"
#else
#include <shlu2/BasicUtils/basealgo.hpp>
#endif
#include <sstream>

#include <regex>

using namespace LJSON;
using namespace std;

// -- definition of reading frame match
//enum EBlast4_frame_type {
//    eBlast4_frame_type_notset = 0,
//    eBlast4_frame_type_plus1  = 1,
//    eBlast4_frame_type_plus2  = 2,
//    eBlast4_frame_type_plus3  = 3,
//    eBlast4_frame_type_minus1 = 4,
//    eBlast4_frame_type_minus2 = 5,
//    eBlast4_frame_type_minus3 = 6
//};
// -- objects/blast/Blast4_frame_type_.hpp
const char * READINGFRAME::RF_TITLES[] = {"RF +1", "RF +2", "RF +3", "RF -1", "RF -2", "RF -3"};
READINGFRAME::TFRAMEID READINGFRAME::RF_IDS[] = {1, 2, 3, -1, -2, -3};

READINGFRAME::TFRAMEINDEX READINGFRAME::Id2Idx(READINGFRAME::TFRAMEID id) noexcept
{
	if (id == 0 || id < -3 || id > 3)
		return 0;	//invalid id always return 0, the first, as protein uses it
	return (id < 0 ? READINGFRAME::RF_SIZE - id - 1 : id - 1);
}

READINGFRAME::TFRAMEID READINGFRAME::Idx2Id(READINGFRAME::TFRAMEINDEX idx) noexcept
{
	if (idx >= READINGFRAME::TOTAL_RFS)
		return 0;
	return READINGFRAME::RF_IDS[idx];
}


const char * READINGFRAME::Idx2Title(READINGFRAME::TFRAMEINDEX idx) noexcept
{
	if (idx >= READINGFRAME::TOTAL_RFS)
		return nullptr;
	return READINGFRAME::RF_TITLES[idx];
}

const char * READINGFRAME::Id2Title(READINGFRAME::TFRAMEID id) noexcept
{
	return READINGFRAME::Idx2Title(READINGFRAME::Id2Idx(id));
}




//SeqPos_t READINGFRAME::NA2Pr(SeqPos_t napos, int rf, SeqLen_t na_len)
//{
//	if (rf > 0)
//	{
//		// -- plus
//		if (rf > 3)
//			return napos;
//		return napos / READINGFRAME::RF_SIZE;
//	}
//	else if (rf < 0)
//	{
//		if (rf < -3)
//			return napos;
//		return (na_len - napos - 1) / READINGFRAME::RF_SIZE;
//	}
//	// -- rf = 0;
//	return napos;
//
//}
//
//SeqPos_t READINGFRAME::Pr2NA(SeqPos_t prpos, int rf, SeqLen_t pr_len)
//{
//	if (rf > 0)
//	{
//		// -- plus
//		if (rf > 3)
//			return prpos;
//		return prpos * READINGFRAME::RF_SIZE + rf - 1;
//	}
//	else if (rf < 0)
//	{
//		// -- minus
//		if (rf < -3)
//			return prpos;
//		return (pr_len - prpos - 1) * READINGFRAME::RF_SIZE - rf - 1;
//	}
//	// -- rf = 0;
//	return prpos;
//
//}


const char * TDataModes::dimLits[] = {rep, std, full};
const char * TDataModes::dimLabels[] = {"Concise", "Standard", "Full"};
const char * TDataModes::dimTags[] = {"DATA_REP", "DATA_STD", "DATA_FULL"};
const char * TDataModes::e_std_alias = "all";



const char * TSearchModes::dimLits[] = {prec, live};
const char * TSearchModes::dimLabels[] = {"Precomputed", "Live BLAST"};
const char * TSearchModes::dimTags[] = {"SEARCH_PREC", "SEARCH_LIVE"};


array<int, TDomSrcCount::TOTALSRCS> TDomSrcCount::MAXCOUNTS = {1, 1, 1, 1, 1, 1, 1};

array<const char * , TDomSrcCount::TOTALSIGS > TDomSrcCount::DOMSRCSIGS = {"CD", "PFAM", "TIGR", "COG", "KOG", "SMART", "PRK", "CHL", "MTH", "PHA", "PLN", "PTZ"};


TDomSrcCount::ESrcIdx TDomSrcCount::DomAccType(const string &acxn)
{
	size_t sig = 0, acxn_len = acxn.size();
	while (sig < TOTALSIGS)
	{

		size_t siglen = strlen(TDomSrcCount::DOMSRCSIGS[sig]);
		if (siglen < acxn_len)
		{
			for (size_t cidx = 0; cidx < siglen; ++cidx)
			{
				if (toupper((unsigned char)acxn[cidx]) != (unsigned char)TDomSrcCount::DOMSRCSIGS[sig][cidx])
					goto labelNext;
			}
			return (ESrcIdx)sig;
		}
	labelNext:
		++sig;
	}
	return TOTALSIGS;
}

TDomSrcCount::TDomSrcCount(const int * mcounts, size_t nums):
	m_SrcCounter(), m_max_counts(TDomSrcCount::MAXCOUNTS)
{
	if (nullptr != mcounts)
	{
		if (nums > TOTALSRCS)
			nums = TOTALSRCS;
		copy(mcounts, mcounts + nums, m_max_counts.begin());
	}
}


bool TDomSrcCount::CountSrc(const string &acxn)
{
	ESrcIdx srctype = DomAccType(acxn);
	if (TOTALSIGS == srctype) return true;	//unknown src always enter.
	if (srctype >= ePRK)
		srctype = ePRK;

	map<ESrcIdx, int> :: iterator iter = m_SrcCounter.emplace(srctype, 0).first;

	if (iter->second >= m_max_counts[srctype]) return false;
	++iter->second;
	return true;
}

PssmId_t TDomSrcCount::ComputePseudoPssmId(const string &acxn)
{
	PssmId_t pseudo_pssmid = 0;
	if (!acxn.empty())
	{
		ESrcIdx srctype = DomAccType(acxn);
		pseudo_pssmid = INVALIDPSSMID + (PssmId_t)(srctype * DOMSRCMUL);


		size_t idx = 0, idxEnd = acxn.size();

		while (idx < idxEnd && !isdigit(acxn[idx]))
			++idx;

		PssmId_t odr = 0;
		while (idx < idxEnd && isdigit(acxn[idx]))
		{
			odr = odr * 10 + (PssmId_t)(acxn[idx] - '0');
			++idx;
		}

		pseudo_pssmid += odr;

	}
	return pseudo_pssmid;
}




const char * TTargetData::dimLits[] = {"doms", "feats", "both"};
const char * TTargetData::dimLabels[] = {"Domain hits", "Site annotations", "Domain hits and site annotations"};
// -- added 5/11/2011 -- for blast use -- dart result string
const char * TDartStatus::dimLits[] =
{
	"no_connection",
	"unknown_id",
	"data_not_ready",
	"no_hits_found",
	"",
	"",
	"error",
	"unqualified"
};


const char* TPublicDBs::dimLits[] = {"cdd", "cdd_ncbi", "cdd_preview", "oasis_pfam", "oasis_smart", "oasis_kog", "oasis_cog", "oasis_prk", "oasis_tigr"};
const char* TPublicDBs::dimDispNames[] = {"CDD", "NCBI_Curated", "NCBI_Preview", "Pfam", "SMART", "KOG", "COG", "PRK", "TIGR"};

string TPublicDBs::GetFilterString(void)
{
	char dimDelimit[2] = {0, 0};
	string result(k_strEmptyString);
	result.reserve(128);
	for (int i = 0; i < eEnumStop - eEnumStart; ++i)
	{
		result.append(dimDelimit);
		result.append(dimLits[i]);
		dimDelimit[0] = '|';
	}
	return result;
}


CPdbId::CPdbId(void):
	m_usedelim(CPdbId::DELIM_CHAR)

{
	Reset();
}

CPdbId::CPdbId(const string &acxn):
	m_usedelim(CPdbId::DELIM_CHAR)
{
	Reset();
	ParsePdbAcxn(acxn);
	//if (!ParsePdbAcxn(acxn))
	//	THROW_SIMPLE("Invalid Pdb ID string " << acxn);
}

bool CPdbId::IsValid(void) const
{
	return m_mol[0];
}


CPdbId::operator string(void) const
{
	if (!m_mol[0])
		return k_strEmptyString;
    
	string s(m_mol);
    
    size_t mol_len = s.size();
    
    if (m_chain[0])
    {
        if (m_usedelim) s.push_back(m_usedelim);
        s.append(m_chain);
    }
    
    return (PDBXMOL_LEN == mol_len ? PDBSIG + s : s); //legacy
    
}


bool CPdbId::x_SetAcxn(const char * acxn, size_t len)
{
    size_t acxn_len = strlen(acxn);
    
    const char * legacy_start = acxn;
    
    if (acxn_len < PDBMOL_LEN)
        return false;
    //bool is_legacy = true;
    if (acxn_len >= PDBXMOL_LEN + PDBSIG_LEN)   //need check for new id
    {
        if (('p' == legacy_start[0] || 'P' == legacy_start[0])
            && ('d' == legacy_start[1] || 'D' == legacy_start[1])
            && ('b' == legacy_start[2] || 'B' == legacy_start[2])
            && ('_' == legacy_start[3]))
        {
            legacy_start = acxn + PDBSIG_LEN;
            
            if (isdigit(*(legacy_start + (PDBXMOL_LEN - PDBMOL_LEN))))   //possibly legacy-compatible
            {
                char dimZeroPfx[PDBXMOL_LEN - PDBMOL_LEN + 1] = {'\0'};
                memset(dimZeroPfx, '0', (PDBXMOL_LEN - PDBMOL_LEN) * sizeof (char));
                dimZeroPfx[PDBXMOL_LEN - PDBMOL_LEN] = 0;   //terminal nullchar
                
                if (0 == strncmp(legacy_start, dimZeroPfx, PDBXMOL_LEN - PDBMOL_LEN))
                {
                    legacy_start += (PDBXMOL_LEN - PDBMOL_LEN);
                    
                    goto labelSetLegacyMol;
                    // -- set legacy 4-char mol
                    
                }
            }
            // -- here set new id
            if (!x_StoreMol(legacy_start, PDBXMOL_LEN))
            {
                Reset();
                return false;
            }
            //is_legacy = false;
            legacy_start += PDBXMOL_LEN;
            goto labelChainParse;
        }
    }
    
    if (!isdigit(legacy_start[0]))
        return false;
labelSetLegacyMol:
    if (!x_StoreMol(legacy_start, PDBMOL_LEN))
    {
        Reset();
        return false;
    }
    legacy_start += PDBMOL_LEN;
    
labelChainParse:
    // legacy_start points to the chain partial_sort
    if (*legacy_start)  //has chain
    {
        char c = *legacy_start++;
        if (!isalnum(c))
        {
            if ('\0' == (*legacy_start))    //only one character, any character can be the chain
            {
                m_chain[0] = c;
                m_chain[1] = 0;
                goto labelReturn;
            }
                    
            c = *legacy_start++; //otherwise, treat it as a separater
        }
        
        m_chain[0] = c;
        size_t chain_char_idx = 1;   //at least one char;
        
        while (chain_char_idx < PDBCHN_MAX_LEN)
        {
            c = *legacy_start++;
            if (!c)
            {
                m_chain[chain_char_idx] = 0;
                goto labelReturn;
            }
            else if (!isalnum(c))
                break;  //not alphanumeric, invalid
            m_chain[chain_char_idx++] = c;
        }
        // too long, considered invalid
        Reset();
        return false;
        
    }
labelReturn:
    return true;
}
    
 

bool CPdbId::x_StoreMol(const char * src, size_t len)
{
    for (size_t i = 0; i < len; ++i)
    {
        char c = *src++;
        if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z'))
            m_mol[i] = c;
        else if (c >= 'a' && c <= 'z')
            m_mol[i] = c - 0x20;    //to upper case
        else
            return false;
    }
    m_mol[len] = 0;
    return true;
}


bool CPdbId::ParsePdbAcxn(const string & acxn)
{
	return x_SetAcxn(acxn.c_str(), acxn.size());
}

bool CPdbId::ParsePdbAcxn(const char * acxn)
{
	return x_SetAcxn(acxn, strlen(acxn));
}


void CPdbId::Reset(void)
{
	memset(m_mol, 0, (PDBXMOL_LEN + 1) * sizeof(char));
	memset(m_chain, 0, (PDBCHN_MAX_LEN + 1) * sizeof(char));
}

void CPdbId::SaveToCache(CDataCache &dc) const
{
	dc << m_usedelim;
	//dc.PushData(&d.m_usedelim, sizeof(char));
	dc.PushData(m_mol, (CPdbId::PDBXMOL_LEN + 1) * sizeof(char));
	dc.PushData(m_chain, (CPdbId::PDBCHN_MAX_LEN + 1) * sizeof(char));
}
void CPdbId::RestoreFromCache(CDataCache &dc)
{
    dc >> m_usedelim;
	//dc.ReadData(&d.m_usedelim, sizeof(char));
	dc.ReadData(m_mol, (CPdbId::PDBXMOL_LEN + 1) * sizeof(char));
	dc.ReadData(m_chain, (CPdbId::PDBCHN_MAX_LEN + 1) * sizeof(char));
}

void TDocsum::Reset(void)
{
	m_iGi = 0;
	m_strAccession.clear();
	m_strNcbiId.clear();
	m_uiSeqLen = 0;
	m_strTitle.clear();
	//m_iMolType = CSeq_inst::eMol_not_set;
	m_bIsNa = false;
	m_iTaxId = 0;
	m_iGenCode = 1;
	m_iMGenCode = 2;	//mitochondrial genetic code
	m_strSciName.clear();	//scientific name
	m_strBlastName.clear();	//blast name
	m_strCommonName.clear();	//common name
    m_strSourceDb.clear();  //source db, friendly name in PigPen..PgDBSrc
    m_iPrefTaxId = -1;
    m_strPrefTaxName.clear();
}

void TDocsum::AmendJson(JSVar dsjson) const
{
    if (m_iGi > 0)
		dsjson[_PROP_GI] = m_iGi;
    if (!m_strAccession.empty())
        dsjson[_PROP_ACC] = m_strAccession;
    if (!m_strNcbiId.empty())
        dsjson[_PROP_LABEL] = m_strNcbiId;
    if (m_uiSeqLen > 0)
        dsjson[_PROP_SZ] = m_uiSeqLen;
    if (!m_strTitle.empty())
        dsjson[_PROP_DEFLINE] = m_strTitle;
    if (m_iTaxId > 0)
        dsjson[_PROP_TAXID] = m_iTaxId;
    if (m_iPrefTaxId >= 0)
        dsjson[_PROP_PREFTAXID] = m_iPrefTaxId;
    if (!m_strPrefTaxName.empty())
        dsjson[_PROP_PREFTAXNAME] = m_strPrefTaxName;
	dsjson[_PROP_SEQTYPE] = (m_bIsNa ? "na" : "aa");
}

JSVar TDocsum::CreateJson(void) const
{
	JSVar dsjson(eObject);
    AmendJson(dsjson);
	return dsjson;
}

void TDocsum::SaveToCache(CDataCache &dc) const
{
	dc << m_iGi << m_strAccession << m_strNcbiId
        << m_uiSeqLen << m_strTitle << m_bIsNa << m_iTaxId
        << m_iGenCode << m_iMGenCode << m_strSciName << m_strBlastName << m_strCommonName << m_iPrefTaxId << m_strPrefTaxName;
}
void TDocsum::RestoreFromCache(CDataCache &dc)
{
    dc >> m_iGi >> m_strAccession >> m_strNcbiId
        >> m_uiSeqLen >> m_strTitle >> m_bIsNa >> m_iTaxId
        >> m_iGenCode >> m_iMGenCode >> m_strSciName >> m_strBlastName >> m_strCommonName >> m_iPrefTaxId >> m_strPrefTaxName;
}



void TSequence::Reset(void)
{
	TDocsum::Reset();
	m_iInputType = 0;	//CCleanInput::EStringDataType values
	m_strCleanedInput.clear();	//also from CCleanInput
	m_strSeqData.clear();	//1-letter seqdata
	m_Src.clear();
	m_B64PackedIds.clear();
	m_iValid = e_Invalid;
	m_iStatus = 0;	//misc status for compatibility, previous field name is m_iOid
	m_iPig = 0;	//if has one. otherwise 0
	// -- added 2012/4/12 -- for range. m_iStart is default to 0, m_iEnd is default to -1, meaning whole length.
	m_iFrom = 0;
	m_iTo = -1;

	m_vecMaskedRegions.clear();
}

void TSequence::AmendJson(JSVar dsjson) const
{
    TDocsum::AmendJson(dsjson);
    dsjson[_PROP_VALID] = m_iValid;
	//aSeq[_PROP_STATUS] = pSeq->m_iStatus;
	if (m_iPig > 0)
		dsjson[_PROP_PIG] = m_iPig;

	if (m_iFrom > 0 || m_iTo >= 0)
	{
		dsjson[_PROP_FROM] = m_iFrom;
		dsjson[_PROP_TO] = m_iTo < 0 ? m_uiSeqLen - 1 : m_iTo;
	}
    
    if (!m_strSeqData.empty())
    {
        dsjson[_PROP_SEQDATA] = m_strSeqData;
    
    // -- override docsum if length is 0
        if (0 == m_uiSeqLen && !m_strSeqData.empty())
            dsjson[_PROP_SZ] = m_strSeqData.size();
    }

	// -- mask regions
	if (!m_vecMaskedRegions.empty())
	{
		JSVar masked(eArray);
		for (const auto &v : m_vecMaskedRegions)
		{
			JSVar mreg(eObject);
			mreg[_PROP_FROM] = v.from;
			mreg[_PROP_TO] = v.to;
			mreg[_PROP_RFID] = v.rf;
			masked.push(mreg);
		}
		dsjson[_PROP_MASKED] = masked;
	}
}

JSVar TSequence::CreateJson(void) const
{
	JSVar seqjson(eObject);
    AmendJson(seqjson);
	return seqjson;
}

void TSequence::SaveToCache(CDataCache &dc) const
{
    TDocsum::SaveToCache(dc);
	dc << m_iInputType << m_strCleanedInput
        << m_strSeqData << m_Src << m_B64PackedIds << m_OriDefline
        << m_iValid << m_iStatus << m_iPig << m_iFrom << m_iTo;

    CachePODData(dc, m_vecMaskedRegions);
}
void TSequence::RestoreFromCache(CDataCache &dc)
{
    TDocsum::RestoreFromCache(dc);
    dc >> m_iInputType >> m_strCleanedInput
        >> m_strSeqData >> m_Src >> m_B64PackedIds >> m_OriDefline
        >> m_iValid >> m_iStatus >> m_iPig >> m_iFrom >> m_iTo;
    RestorePODData(dc, m_vecMaskedRegions);
}


const char * TDomSite::GENERIC_SITE_TITLE = "active site";
const char * TDomSite::FEATTYPES[] = {"other", "active site", "polypeptide binding site", "nucleotide binding site", "ion binding site", "chemical binding site", "posttranslational modification", "structural motif"};

map<int, string> TDomSite::m_stFeatTypes;

const char * TDomSite::GetFeatType(int idx)
{
	if (!TDomSite::m_stFeatTypes.empty())
	{
		map<int, string> :: const_iterator iter = TDomSite::m_stFeatTypes.find(idx);
		if (TDomSite::m_stFeatTypes.end() != iter)
			return iter->second.c_str();
	}

	if (idx >= 0 && idx < TOTAL_OFFL_TYPES)
		return FEATTYPES[idx];
	return GENERIC_SITE_TITLE;
}

JSVar TDomSite::CreateLocs(const CSegSet &segset)
{
	JSVar mapArrays(eObject), coords(eArray), oriCoords(eArray);
	mapArrays[_PROP_COORDS] = coords;
	mapArrays[_PROP_ORICOORDS] = oriCoords;
	const CSegSet::TSegs &segs = segset.GetSegs();

	for (CSegSet::TSegs::const_iterator iter = segs.begin(), iterEnd = segs.end(); iterEnd != iter; ++iter)
	{

		for (SeqPos_t i = iter->from; i <= iter->to; ++i)
		{
			coords.push(i);
			oriCoords.push(segset.GetOriTo(iter, i));
		}
	}
	return mapArrays;
}

void TDomSite::GetMotifResPos(vector<SeqPos_t> &dst) const
{
	size_t ttlsegs = m_lstContainer.size();
	vector<TSegs::const_iterator> segs;
	segs.reserve(ttlsegs);

	size_t ttlres = 0;
	for (TSegs::const_iterator iter = m_lstContainer.begin(), iterEnd = m_lstContainer.end(); iterEnd != iter; ++iter)
	{
		segs.emplace_back(iter);
		ttlres += iter->to - iter->from + 1;
	}


	dst.clear();
	dst.reserve(ttlres);
	for (auto v : segs)
		for (SeqPos_t c = v->from; c <= v->to; ++c)
			dst.push_back(c);
}

void TDomSite::Reset(void)
{
	CSegSet::Clear();
	m_iFactor = 1;
	m_strTitle.clear();
	m_strDescr.clear();
	m_strMotif.clear();
	m_iMotifuse = 0;
	m_iIndex = 0;
	m_iType = 0;
	m_iCompleteSize = 0;
	m_flags = 0;
}

void TDomSite::AddOriCoords(LJSON::JSVar dsjson) const
{
    JSVar coords(eArray);
	const CSegSet::TSegs &segs = GetSegs();
	for (CSegSet::TSegs::const_iterator iter = segs.begin(), iterEnd = segs.end(); iterEnd != iter; ++iter)
		for (SeqPos_t i = iter->from; i <= iter->to; ++i)
			coords.push(i);

	dsjson[_PROP_COORDS] = coords;
}

void TDomSite::AmendJson(JSVar dsjson) const
{
    dsjson[_PROP_TITLE] = m_strTitle;
	if (!m_strDescr.empty())
		dsjson[_PROP_DESCR] = m_strDescr;
	dsjson[_PROP_TYPE] = TDomSite::GetFeatType(m_iType);
	dsjson[_PROP_IDX] = m_iIndex;
	dsjson[_PROP_SZ] = m_iCompleteSize;
}

JSVar TDomSite::CreateJson(void) const
{
	JSVar stjson(eObject);
	AmendJson(stjson);
    AddOriCoords(stjson);
	return stjson;
}

int TDomSite::MotifCheck(const vector<TSeg_base::TResiduePos> &rMappedRes, const string &aaSeq) const
{
	static vector<SeqPos_t> st_motif_pos;

	if (!m_strMotif.empty())
	{
		if (st_motif_pos.empty())
			GetMotifResPos(st_motif_pos);

		//vector<TSeg_base::TResiduePos> vecOriPoses;
		//GetTranslatedPosMap(seqLen, vecOriPoses);

		CProSite ps;
		size_t errPos;
		CProSite::EParseError err = ps.Parse(m_strMotif, errPos);
		if (CProSite::eNoError != err)
		{
			THROW_SIMPLE("Motif string parse error -- Motif = " << m_strMotif << ", error position: " << errPos);


			//stringstream ss;
			//ss << "Motif string parse error -- Motif = " << m_strMotif << ", error position: " << errPos;
			//throw CSimpleException(__FILE__, __LINE__, ss.str());
		}
		string minMap(k_strEmptyString);
		ps.GetMinimalXMap(minMap);

		size_t mtfIdx = 0, mtfLen = minMap.size();
		size_t mapIdx = 0, mappedLen = rMappedRes.size();
//		assert(mtfLen == vecOriPoses.size());

		while (mtfIdx < mtfLen)
		{
			while (mapIdx < mappedLen)
			{
				if (rMappedRes[mapIdx].ori > st_motif_pos[mtfIdx])	//failed
					return 1;
				else if (rMappedRes[mapIdx].ori == st_motif_pos[mtfIdx])
				{
					++mapIdx;
					goto labelContinue;
				}
				++mapIdx;

			}
			// -- exhausted, fail
			return 1;
		labelContinue:
			++mtfIdx;
		}

		size_t seqLen = aaSeq.size();
		if (!aaSeq.empty())
		{
			minMap.clear();	//borrow this for other use

			for (size_t i = 0; i < mappedLen; ++i)
				if ((size_t)rMappedRes[i].curr < seqLen)
				{
					minMap.push_back(aaSeq[rMappedRes[i].curr]);
				}

			size_t endPos = ps.Match(minMap, seqLen, 0);
			if (string::npos == endPos)
			{
				return 2;
			}
		}
	}

	return 0;
}

double TDomSite::EvaluateCompleteness(const CSegSet & segset) const
{
    return (double)(segset.GetTotalResidues()) / (double)m_iCompleteSize;
}

void TDomSite::SaveToCache(CDataCache &dc) const
{
    CSegSet::SaveToCache(dc);
	dc << m_strTitle << m_strDescr << m_strMotif
        << m_iMotifuse << m_iIndex << m_iType << m_iCompleteSize << m_flags;
}
void TDomSite::RestoreFromCache(CDataCache &dc)
{
    CSegSet::RestoreFromCache(dc);
    dc >> m_strTitle >> m_strDescr >> m_strMotif
        >> m_iMotifuse >> m_iIndex >> m_iType >> m_iCompleteSize >> m_flags;
}


bool DomAcxnSig(const string &acxn, const char * sig)
{
	size_t pos = acxn.find(':');
	if (string::npos == pos) pos = 0;
	else ++pos;

	return 0 == acxn.compare(pos, strlen(sig), sig);
}

TCluster::TCluster(void):
	m_uiPssmId(0), m_uiLength(0), m_iClusterId(INVALIDCLUSTERID), m_strAccession(k_strEmptyString), m_strShortName(k_strEmptyString), m_strTitle(k_strEmptyString), m_strDefline(k_strEmptyString)
{}

void TCluster::Reset(void)
{
	m_uiPssmId = 0;
	m_uiLength = 0;
	m_iClusterId = INVALIDCLUSTERID;
	m_strAccession.clear();
	m_strShortName.clear();
	m_strTitle.clear();
	m_strDefline.clear();
}

const char*  TCluster::ConstructClusterAccession(char *buf) const
{
	static char stbuf[16];	//just for safety
	if (nullptr == buf)
		buf=stbuf;
	sprintf(buf, "cl%05d", m_iClusterId);
	return buf;
}

void TCluster::AmendJson(JSVar dsjson) const
{
    dsjson[_PROP_PSSMID] = m_uiPssmId;
	string ttl(m_strTitle);
	if (ttl.empty())
		GetShortDomainDefline(m_strDefline, ttl);
	dsjson[_PROP_SNAME] = m_strShortName;
	dsjson[_PROP_TITLE] = ttl;
	dsjson[_PROP_DEFLINE] = m_strDefline;
	dsjson[_PROP_SZ] = m_uiLength;
	dsjson[_PROP_PSSMID] = m_uiPssmId;
	dsjson[_PROP_CLID] = m_iClusterId;
	dsjson[_PROP_ACC] = m_strAccession;
}

JSVar TCluster::CreateJson(void) const
{
	JSVar clstjson(eObject);
	AmendJson(clstjson);
	return clstjson;
}

void TCluster::SaveToCache(CDataCache &dc) const
{
    dc << m_uiPssmId << m_uiLength << m_iClusterId << m_strAccession
        << m_strShortName << m_strTitle << m_strDefline;
}
void TCluster::RestoreFromCache(CDataCache &dc)
{
    dc >> m_uiPssmId >> m_uiLength >> m_iClusterId >> m_strAccession
        >> m_strShortName >> m_strTitle >> m_strDefline;
}



TDomain::TDomain(void):
	TCluster(), m_uiHierarchyRoot(0), m_uiHierarchyParent(0), m_uiClusterPssmId(0),
	m_dMinBitScore(0.0), m_bCurated(false), m_bIsStructDom(false), m_bMultiDom(false),
	m_strConsensus(), m_strSource(),
	m_lstSpecificFeatures(), m_lstGenericFeatures()
{}

void TDomain::Reset(void)
{
	TCluster::Reset();
	m_uiHierarchyRoot = 0;	//root pssmid
	m_uiHierarchyParent = 0;	//root pssmid
	m_uiClusterPssmId = 0;
	m_dMinBitScore = 0;
	m_bCurated = 0;
	m_bIsStructDom = 0;
	m_bMultiDom = 0;
	m_strConsensus.clear();
	m_strSource.clear();
	m_lstSpecificFeatures.clear();
	m_lstGenericFeatures.clear();
}


void TDomain::AmendJson(LJSON::JSVar dsjson, bool include_feats) const
{
    TCluster::AmendJson(dsjson);
    dsjson[_PROP_ISNCBI] = m_bCurated;
	dsjson[_PROP_ISMULTI] = m_bMultiDom;
	dsjson[_PROP_ISSD] = m_bIsStructDom;
	dsjson[_PROP_MINBSCORE] = m_dMinBitScore;
	dsjson[_PROP_ROOT] = m_uiHierarchyRoot;
	dsjson[_PROP_CONSENSUS] = m_strConsensus;
    
    if (include_feats)
    {
        JSVar gen_feats(eArray), sp_feats(eArray);
        dsjson[_PROP_GENFTS] = gen_feats;
        dsjson[_PROP_SPFTS] = sp_feats;
        
        for (const TDomSite & site : m_lstSpecificFeatures)
            sp_feats.push(site.CreateJson());
        
        for (const TDomSite & site : m_lstGenericFeatures)
            gen_feats.push(site.CreateJson());
    }
}

JSVar TDomain::CreateJson(bool include_feats) const
{
	JSVar domjson(eObject);
	AmendJson(domjson, include_feats);

	// -- add sites, but no coordinates
    
    // -- usually we do not need these. 
    //if (!m_lstSpecificFeatures.empty())
    //{
    //    JSVar spfeats(eArray);
    //    for (const auto v : m_lstSpecificFeatures)
    //        spfeats.push(v.CreateJson());
    //
    //    domjson[_PROP_SPFTS] = spfeats;
    //}
    //
    //if (!m_lstGenericFeatures.empty())
    //{
    //    JSVar genfeats(eArray);
    //    for (const auto v : m_lstGenericFeatures)
    //        genfeats.push(v.CreateJson());
    //
    //    domjson[_PROP_GENFTS] = genfeats;
    //}

	return domjson;
}

JSVar TDomain::CreateFeatureArray(bool is_spec) const
{
    const list<TDomSite> TDomain::*pFeats = is_spec ? &TDomain::m_lstSpecificFeatures : &TDomain::m_lstGenericFeatures;
    JSVar feats(eArray);
    for (const auto &v : this->*pFeats)
        feats.push(v.CreateJson());
    return feats;
}

void TDomain::SaveToCache(CDataCache &dc) const
{
    TCluster::SaveToCache(dc);
    dc << m_uiHierarchyRoot << m_uiHierarchyParent
        << m_uiClusterPssmId << m_dMinBitScore << m_bCurated
        << m_bIsStructDom << m_bMultiDom << m_strConsensus << m_strSource;
        
    CacheArray(dc, m_lstSpecificFeatures);
    CacheArray(dc, m_lstGenericFeatures);
}

void TDomain::RestoreFromCache(CDataCache &dc)
{
    TCluster::RestoreFromCache(dc);
    dc >> m_uiHierarchyRoot >> m_uiHierarchyParent
        >> m_uiClusterPssmId >> m_dMinBitScore >> m_bCurated >> m_bIsStructDom
        >> m_bMultiDom >> m_strConsensus >> m_strSource;
    RestoreArray(dc, m_lstSpecificFeatures);
    RestoreArray(dc, m_lstGenericFeatures);
}


void CDomClusterIndex::Reset(void)
{
	m_pssmid2cd.Reset();
	m_acxn2cd.Reset();
	m_pssmid2fam.Reset();
	m_clid2fam.Reset();
}

void CDomClusterIndex::InsertDomainIdx(TDomain *p)
{
	bool ins_dummy;
	m_pssmid2cd.Insert(p, ins_dummy);
	m_acxn2cd.Insert(p, ins_dummy);
}
void CDomClusterIndex::InsertClusterIdx(TCluster *p)
{
	bool ins_dummy;
	m_pssmid2fam.Insert(p, ins_dummy);
	m_clid2fam.Insert(p, ins_dummy);
}

void CArchIndex::Reset(void)
{
	m_id2sp.Reset();
	m_str2sp.Reset();
	m_id2fam.Reset();
	m_str2fam.Reset();
}

void CArchIndex::InsertSpArchIdx(TSpDomArch *p)
{
	bool ins_dummy;
	m_id2sp.Insert(p, ins_dummy);
	m_str2sp.Insert(p, ins_dummy);
}
void CArchIndex::InsertFamArchIdx(TDomArch *p)
{
	bool ins_dummy;
	m_id2fam.Insert(p, ins_dummy);
	m_str2fam.Insert(p, ins_dummy);
}

void TDomArch::Reset(void)
{
	m_uiArchId = 0;
	m_strArchString.clear();
	m_strReviewLevel.clear();
}

void TSpDomArch::Reset(void)
{
	TDomArch::Reset();
	m_strName.clear();
	m_strLabel.clear();
	m_uiSupFamArchId = 0;
}

void TDomArch::AmendJson(JSVar dsjson) const
{
    dsjson[_PROP__ID] = m_uiArchId;
	dsjson[_PROP_ARCHSTR] = m_strArchString;
}

JSVar TDomArch::CreateJson(void) const
{
	JSVar archjson(eObject);
	AmendJson(archjson);
	return archjson;
}


void TDomArch::SaveToCache(CDataCache &dc) const
{
    dc << m_uiArchId << m_strArchString << m_strReviewLevel;
}
void TDomArch::RestoreFromCache(CDataCache &dc)
{
    dc >> m_uiArchId >> m_strArchString >> m_strReviewLevel;
}



void TSpDomArch::AmendJson(JSVar dsjson) const
{
    TDomArch::AmendJson(dsjson);
    dsjson[_PROP_NAME] = m_strName;
	dsjson[_PROP_TITLE] = m_strLabel;
	if (m_uiSupFamArchId > 0)
		dsjson[_PROP_SFARCHID] = m_uiSupFamArchId;
}

JSVar TSpDomArch::CreateJson(void) const
{
	JSVar sparchjson(eObject);
    AmendJson(sparchjson);
	return sparchjson;
}

void TSpDomArch::SaveToCache(CDataCache &dc) const
{
    TDomArch::SaveToCache(dc);
    dc << m_strName << m_strLabel << m_uiSupFamArchId;
}

void TSpDomArch::RestoreFromCache(CDataCache &dc)
{
    dc >> m_strName >> m_strLabel >> m_uiSupFamArchId; 
}




void TSeqAlignment::PrintEValue(char *buf, double eval)
{
	if (eval < 1.0e-180) sprintf(buf, "%.0e", eval);
	else if (eval < 0.01) sprintf(buf, "%.2e", eval);
	else if (eval < 1.0) sprintf(buf, "%.2f", eval);
	else if (eval < 10.0) sprintf(buf, "%.1f", eval);
	else sprintf(buf, "%.0f", eval);
}

void TSeqAlignment::PrintBitScore(char *buf, double bscore)
{
	if (bscore < 0.0)
		sprintf(buf, "NA");
	else if (bscore > 9999.0)
		sprintf(buf, "%.2e", bscore);
	else if (bscore > 99.9)
		sprintf(buf, "%.0ld", (long)bscore);
	else
		sprintf(buf, "%.2f", bscore);
}


const char * TSeqAlignment::dimLits[] = {"evalue", "bitscore", "seqidentity", "alignedlen", "scorecombo1"};
const char * TSeqAlignment::dimLabels[] = {"BLAST E-value", "BLAST bit score", "Sequence Identity", "Aligned Length", "Score Combination"};
TSeqAlignment::TSortObj::TSortObj(int iSortIdx):
	m_lpfnCompare(TSeqAlignment::EValueCompare)
{
	switch (iSortIdx)
	{
		case TSeqAlignment::SORT_BY_BITSCORE:
			m_lpfnCompare = TSeqAlignment::BitScoreCompare;
			break;
		case TSeqAlignment::SORT_BY_SEQ_IDENTITY:
			m_lpfnCompare = TSeqAlignment::SeqIdentityCompare;
			break;
		case TSeqAlignment::SORT_BY_ALIGNED_LENGTH:
			m_lpfnCompare = TSeqAlignment::AlignedLengthCompare;
			break;
		case TSeqAlignment::SORT_BY_SCORE_COMBO:
			m_lpfnCompare = TSeqAlignment::ScoreComboEvaluate;
			break;
		default:;
	}
}



void TSeqAlignment::PrintPercentage(char *buf, double pct)
{
	sprintf(buf, "%d%%", (int)(pct + 0.5));
}

void TSeqAlignment::Reset(void)
{
	m_uiAlignedLen = 0;
	m_dAlignedPct = 0.0;
	m_iScore = 0.0;
	m_dEValue = 0.0;
	m_dBitScore = 0;
	m_iNumIdent = 0;
	m_dSeqIdentity = 0.0;
	m_eAlignType = eNormal;
	m_bIsMinus = false;
	m_ReadingFrame = 0;
	m_iFrom = 0;
	m_iTo = 0;
	m_vecMStarts.clear();
	m_vecSStarts.clear();
	m_vecLens.clear();
	m_ClipSet.Clear();

}

string TSeqAlignment::GetAlignString(void) const
{
	stringstream oAlignData;
	size_t ulTotalSegs = m_vecLens.size();
	oAlignData << ulTotalSegs;
	for (size_t j = 0; j < ulTotalSegs; ++j)
	{
		oAlignData << "," << m_vecMStarts[j] << "," << m_vecSStarts[j] << "," << m_vecLens[j];
	}
	oAlignData << '\0';
	return oAlignData.str();
}

// -- this should be final step after chain mapping: convert the coordinate on the
// -- "effective peptide" onto the original NA sequence. 
void TSeqAlignment::Pr2NaConvert(CSegSet &segset) const
{
	if (ePr2Na == m_eAlignType)
	{
		//SeqPos_t (*lpfnPr2PlusNA)(SeqPos_t pr, READINGFRAME::TFRAMEINDEX rfidx, SeqLen_t na_len) = nullptr;

		SeqLen_t alnLen = (m_ReadingFrame >> 2);

		READINGFRAME::TFRAMEINDEX rfidx = m_ReadingFrame & READINGFRAME::RF_SIZE;	//reading frame at positive strand
		segset.m_iFactor = READINGFRAME::RF_SIZE;
		if (alnLen > 0)	//is minus
		{
			// -- alnLen is aligned pr len. Convert to na len
			alnLen = alnLen * READINGFRAME::RF_SIZE + rfidx;
			//rfidx = READINGFRAME::RF_SIZE;	//use alnLen, it's always readingframe -1, ie, rfidx=3
			//lpfnPr2PlusNA = &READINGFRAME::MinusPr2PlusNA;
			for (CSegSet::TSegs::iterator iterSeg = segset.m_lstContainer.begin(); iterSeg != segset.m_lstContainer.end(); ++iterSeg)
			{
				SeqPos_t newFrom = READINGFRAME::MinusPr2PlusNA(iterSeg->from, READINGFRAME::RF_SIZE, alnLen);
				iterSeg->from = READINGFRAME::MinusPr2PlusNA(iterSeg->to, READINGFRAME::RF_SIZE, alnLen) - READINGFRAME::RF_SIZE + 1;
				iterSeg->to = newFrom;

				// -- reverse ori_as well
				iterSeg->ori_from += (iterSeg->to - iterSeg->from + 1) / segset.m_iFactor - 1;

			}
			segset.m_iFactor = -segset.m_iFactor;	//change direction
			segset.m_lstContainer.reverse();
		}
		else	//plus strand
		{
			for (CSegSet::TSegs::iterator iter = segset.m_lstContainer.begin(); iter != segset.m_lstContainer.end(); ++iter)
			{
				iter->from = READINGFRAME::PlusPr2PlusNA(iter->from, rfidx, 0);
				iter->to = READINGFRAME::PlusPr2PlusNA(iter->to, rfidx, 0) + READINGFRAME::RF_SIZE - 1;

			}
		}
	}
}

// -- the key function to track ori_from.
// -- assume mapping is always from protein to protein
void TSeqAlignment::MapSegSet(CSegSet &segset, bool doConvert) const
{
	if (segset.IsEmpty()) return;

	CSegSet::TSegs::iterator iter = segset.m_lstContainer.begin();
	size_t idx = 0;

	while (iter != segset.m_lstContainer.end())
	{
		if (idx >= m_vecLens.size() || m_vecSStarts[idx] > iter->to)	// discard this seg
		{
			CSegSet::TSegs::iterator temp = iter;
			++iter;
			segset.m_lstContainer.erase(temp);
		}
		else
		{
			SeqPos_t diff = m_vecSStarts[idx] - m_vecMStarts[idx];
			SeqPos_t end = m_vecSStarts[idx] + m_vecLens[idx] - 1;
			if (end >= iter->to)	//No trunction from right side
			{
				if (iter->from < m_vecSStarts[idx])	//segment shrinked from left. deal with ori_from
				{
					iter->ori_from += (m_vecSStarts[idx] - iter->from);// / segset.m_iFactor;
					iter->from = m_vecSStarts[idx];
				}

				if (end == iter->to)	//seg happens to end at aligned seg
					++idx;	//here is the chance to advance idx


				// -- mapping
				iter->from -= diff;
				iter->to -= diff;

				++iter;
			}
			else if (end >= iter->from)	//end < iter->to, truncate from right side
			{
				CSegSet::TSeg temp(iter->from, end);
				temp.ori_from = iter->ori_from;

				if (temp.from < m_vecSStarts[idx])
				{
					temp.ori_from += (m_vecSStarts[idx] - temp.from);// / segset.m_iFactor;
					temp.from = m_vecSStarts[idx];
				}

				// -- mapping
				temp.from -= diff;
				temp.to -= diff;
				segset.m_lstContainer.emplace(iter, temp);

				// -- cut original seg for next round
				iter->ori_from += (end - iter->from + 1);// / segset.m_iFactor;
				iter->from = end + 1;
				//iter->ori_from += (iter->from) / segset.m_iFactor;
				++idx;
			}
			else	//end < iter->from, step to next denseg
				++idx;
		}
	}

	// -- check motif

	//if (eNa_strand_minus == m_eStrand) StrandConvert(segset);	//possible strand conversion
	// -- if protein to na, convert to na coord
	if (doConvert) Pr2NaConvert(segset);

}
// -- assume: segs contains the mapped segments in Protein coordinates.
READINGFRAME::TFRAMEINDEX TSeqAlignment::GetTranslatedPosMap(const CSegSet &mappedAAsegs, SeqLen_t qLen, vector<TSeg_base::TResiduePos> &rMappedAAPos) const
{
	rMappedAAPos.clear();

	const CSegSet::TSegs &segs = mappedAAsegs.GetSegs();
	size_t ttlres = mappedAAsegs.GetTotalResidues();

	rMappedAAPos.reserve(ttlres);
	SeqLen_t aaLen = qLen;	//assume it's aa
	READINGFRAME::TFRAMEINDEX rfidx = m_ReadingFrame & READINGFRAME::RF_SIZE;

	if (ePr2Na == m_eAlignType)
	{
		SeqLen_t alignedLen = m_ReadingFrame >> 2;
		aaLen = (qLen - rfidx) / READINGFRAME::RF_SIZE;
		if (alignedLen > 0)	//minus strand. Plus strand will have 0 == alignedLen
		{
			if (alignedLen > aaLen)	//error
				THROW_SIMPLE("Invalid protein length " << aaLen << ": shorter than aligned range " << alignedLen);
			rfidx = READINGFRAME::PlusRFIdx2MinusRFIdx(rfidx, qLen);
			SeqLen_t offset = aaLen - alignedLen;

			for (CSegSet::TSegs::const_iterator iterSeg = segs.begin(), iterSegEnd = segs.end(); iterSegEnd != iterSeg; ++iterSeg)
			{
				for (SeqPos_t c = iterSeg->from, inc = 0; c <= iterSeg->to; ++c, ++inc)
				{
					rMappedAAPos.emplace_back(c + offset, iterSeg->ori_from + inc);
				}
			}
			goto labelReturn;
		}
	}
	// -- either protein or plus strand translation, no adjust needed.
	for (CSegSet::TSegs::const_iterator iterSeg = segs.begin(), iterSegEnd = segs.end(); iterSegEnd != iterSeg; ++iterSeg)
	{
		for (SeqPos_t c = iterSeg->from, inc = 0; c <= iterSeg->to; ++c, ++inc)
		{
			rMappedAAPos.emplace_back(c, iterSeg->ori_from + inc);
		}
	}
labelReturn:
	return rfidx;


	//// -- old implementation
	//segs.GetTranslatedPosMap(seqLen, rMappedAAPos);
	//if (ePr2Na == m_eAlignType)	//needs translation
	//	return GetRFIdx(seqLen);
	//else
	//	return 0;
}


void TSeqAlignment::CreateSlaveSegs(CSegSet &segset) const
{
	segset.Clear();
	for (size_t i = 0, len = m_vecLens.size(); i < len; ++i)
	{
		segset.AddSeg(m_vecSStarts[i], m_vecSStarts[i] + m_vecLens[i] - 1);
	}
}

void TSeqAlignment::CreateMasterSegs(CSegSet &segset) const
{
	segset.Clear();
	for (size_t i = 0, len = m_vecLens.size(); i < len; ++i)
	{
		segset.AddSeg(m_vecMStarts[i], m_vecMStarts[i] + m_vecLens[i] - 1);
	}
}


void TSeqAlignment::CalcMasterGaps(SeqLen_t gapThreshold, CSegSet &segset) const
{
	segset.Clear();
	if (!m_vecLens.empty())
	{
		size_t segs = m_vecLens.size();
		for (size_t i = 0; i < segs - 1; ++i)
		{
			SeqPos_t gapstart = m_vecMStarts[i] + (SeqPos_t)m_vecLens[i];
			SeqPos_t gaplen = m_vecMStarts[i + 1] - gapstart;

			if (gaplen >= (SeqPos_t)gapThreshold)	//consider a gap
				segset.AddSeg(gapstart, gapstart + gaplen - 1);
		}
	}
}

// -- seqLen is the length of query sequence, na or aa.
READINGFRAME::TFRAMEINDEX TSeqAlignment::GetRFIdx(SeqLen_t seqLen) const
{
	if (ePr2Na != m_eAlignType) return 0;
	SeqLen_t alignedLen = m_ReadingFrame >> 2;	//m_ReadingFrame contains the positive side reading frame, always.
	READINGFRAME::TFRAMEINDEX rfidx = m_ReadingFrame & READINGFRAME::RF_SIZE;

	if (alignedLen > 0)	//negative -- minus strand
		rfidx = READINGFRAME::PlusRFIdx2MinusRFIdx(rfidx, seqLen);
	return rfidx;
}

void TSeqAlignment::AddSegs(LJSON::JSVar &pobj) const
{

	JSVar segs(eArray);
	CSegSet dst;
	CreateSlaveSegs(dst);
	MapSegSet(dst);
    
    if (ePr2Na == m_eAlignType)
    {
        JSVar vpsegs(eArray);
        const CSegSet::TSegs & rSegs = dst.GetSegs();

        for (CSegSet::TSegs::const_iterator iter = rSegs.begin(), iterEnd = rSegs.end(); iterEnd != iter; ++iter)
        {
            JSVar aSeg(eObject);
            aSeg[_PROP_FROM] = iter->from;
            aSeg[_PROP_TO] = iter->to;
            aSeg[_PROP_ORIFROM] = iter->ori_from;
            aSeg[_PROP_ORITO] = dst.GetOriTo(iter);
            vpsegs.push(aSeg);
        }
        
        pobj[_PROP_VPSEGS] = vpsegs;
        Pr2NaConvert(dst);
    }
    
	const CSegSet::TSegs & rSegs = dst.GetSegs();
	for (CSegSet::TSegs::const_iterator iter = rSegs.begin(), iterEnd = rSegs.end(); iterEnd != iter; ++iter)
	{
		JSVar aSeg(eObject);
		aSeg[_PROP_FROM] = iter->from;
		aSeg[_PROP_TO] = iter->to;
		aSeg[_PROP_ORIFROM] = iter->ori_from;
		aSeg[_PROP_ORITO] = dst.GetOriTo(iter);
		segs.push(aSeg);
	}
	pobj[_PROP_SEGS] = segs;
	pobj[_PROP_REALSEGS] = segs;
}

void TSeqAlignment::AmendJson(JSVar dsjson) const
{
    dsjson[_PROP_EVALUE] = m_dEValue;
	dsjson[_PROP_BITSCORE] = m_dBitScore;
	dsjson[_PROP_ALNSCORE] = m_iScore;
	dsjson[_PROP_ALNPCT] = m_dAlignedPct;
	dsjson[_PROP_ALNNUMID] = m_iNumIdent;
	dsjson[_PROP_ALNIDT] = m_dSeqIdentity;
	dsjson[_PROP_ALNLEN] = m_uiAlignedLen;
	dsjson[_PROP_TYPE] = m_eAlignType;
	if (ePr2Na == m_eAlignType)
	{
		dsjson[_PROP_ALNISMINUS] = m_bIsMinus;
	}
	if (m_iFrom != 0 && m_iTo != 0)
	{
		dsjson[_PROP_FROM] = m_iFrom;
		dsjson[_PROP_TO] = m_iTo;
	}
	// -- add segs
	AddSegs(dsjson);
}
	

JSVar TSeqAlignment::CreateJson(void) const
{
	JSVar alnjson(eObject);
	AmendJson(alnjson);

	return alnjson;
}

void TSeqAlignment::SaveToCache(CDataCache &dc) const
{
    dc << m_uiAlignedLen << m_dAlignedPct << m_iScore << m_dEValue
       << m_dBitScore << m_iNumIdent << m_dSeqIdentity << m_eAlignType
       << m_bIsMinus << m_ReadingFrame << m_iFrom << m_iTo;
    CachePODData(dc, m_vecMStarts);
    CachePODData(dc, m_vecSStarts);
    CachePODData(dc, m_vecLens);
    m_ClipSet.SaveToCache(dc);
    
}

void TSeqAlignment::RestoreFromCache(CDataCache &dc)
{
    dc >> m_uiAlignedLen >> m_dAlignedPct >> m_iScore >> m_dEValue
        >> m_dBitScore >> m_iNumIdent >> m_dSeqIdentity >> m_eAlignType
        >> m_bIsMinus >> m_ReadingFrame >> m_iFrom >> m_iTo;
        
    RestorePODData(dc, m_vecMStarts);
    RestorePODData(dc, m_vecSStarts);
    RestorePODData(dc, m_vecLens);
    m_ClipSet.RestoreFromCache(dc);
}



//void CleanAlignment(const vector<SeqPos_t> & rSrcStarts, const vector<SeqLen_t> & rSrcLens, vector<SeqPos_t>& rMStarts, vector<SeqPos_t>& rSStarts, vector<SeqLen_t>& rLens)
//{
//	rMStarts.clear();
//	rSStarts.clear();
//	rLens.clear();
//	for (size_t i = 0; i < rSrcLens.size(); ++i)
//	{
//		size_t ii = i + i;
//		if (rSrcStarts[ii] >=0 && rSrcStarts[ii + 1] >= 0)
//		{
//			rMStarts.push_back(rSrcStarts[ii]);
//			rSStarts.push_back(rSrcStarts[ii + 1]);
//			rLens.push_back(rSrcLens[i]);
//		}
//	}
//}


// -- This adds display segs. segs added by TSeqAlignment::AddSegs
// -- are renamed to realsegs.
void TDomSeqAlignment::AddSegs(JSVar &pobj) const
{
    this->TSeqAlignment::AddSegs(pobj);
    
    // -- recreate segs
    JSVar segs(eArray);
    
    pobj[_PROP_SEGS] = segs;
    
    CSegSet dst;
	//pAlign->CreateMasterSegs(dst);
	CreateSlaveSegs(dst);

	MapSegSet(dst, true);

	dst.m_ulGapThreshold = TDomSeqAlignment::GAP_THRESHOLD;

	CSegSet gaps;
	dst.GetGaps(gaps);
    
    const CSegSet::TSegs & rSegs = dst.GetSegs(), &rGapSegs = gaps.GetSegs();

	CSegSet::TSegs::const_iterator iter = rSegs.begin(), iterEnd = rSegs.end(), iterGap = rGapSegs.begin(), iterGapEnd = rGapSegs.end();
	CSegSet::TSegs::const_iterator iter0 = iter, iter1 = iter0;
    
    ++iter;
    while (iterEnd != iter)
    {
        if (iterGapEnd != iterGap)
		{
			if (iterGap->to < iter->from)	//gap reached. Push in last segment
			{
				JSVar aSeg(eObject);
				aSeg[_PROP_FROM] = iter0->from;
				aSeg[_PROP_TO] = iter1->to;
				aSeg[_PROP_ORIFROM] = iter0->ori_from;
				aSeg[_PROP_ORITO] = dst.GetOriTo(iter1);
				segs.push(aSeg);

				iter0 = iter;
				++iterGap;
			}
		}
        iter1 = iter;
		++iter;
    }
    
    // -- last seg
	JSVar aSeg(eObject);
	aSeg[_PROP_FROM] = iter0->from;
	aSeg[_PROP_TO] = iter1->to;
	aSeg[_PROP_ORIFROM] = iter0->ori_from;
	aSeg[_PROP_ORITO] = dst.GetOriTo(iter1);
	segs.push(aSeg);
    
    
    //AddAlignedSegs(pobj);
    //JSVar aLoc = iterAlign->pAlign->CreateAlignedSegs();
    //
    //
    //JSVar segs(eArray), realsegs(eArray);
	//CSegSet dst;
	//CreateSlaveSegs(dst);
	//MapSegSet(dst, true);
    //dst.m_ulGapThreshold = TDomSeqAlignment::GAP_THRESHOLD;
    //
    //CSegSet gaps;
	//dst.GetGaps(gaps);
    //
	//const CSegSet::TSegs & rSegs = dst.GetSegs();
	//for (CSegSet::TSegs::const_iterator iter = rSegs.begin(), iterEnd = rSegs.end(); iterEnd != iter; ++iter)
	//{
	//	JSVar aSeg(eObject);
	//	aSeg[_PROP_FROM] = iter->from;
	//	aSeg[_PROP_TO] = iter->to;
	//	aSeg[_PROP_ORIFROM] = iter->ori_from;
	//	aSeg[_PROP_ORITO] = dst.GetOriTo(iter);
	//	realsegs.push(aSeg);
	//}
	////pobj[_PROP_SEGS] = segs;
    //
    //
    //
    //
    //JSVar segs = pobj[_PROP_SEGS];
    //if (segs.IsNull()) //not yet have segs, call TSeqAlignment::AddSegs
    //{
    //    TSeqAlignment::AddSegs(pobj);
    //    segs = pobj[_PROP_SEGS];
    //}
    //
    //// -- re-assign segs to real-segs
    //pobj[_PROP_REALSEGS] = segs;
    //// -- reset segs
    //segs = JSVar(eArray);
    //
    //CSegSet dst;
    //// -- do this instead of creating master segs: in order to get ori.
	//CreateSlaveSegs(dst);
    //MapSegSet(dst, true);
    //dst.m_ulGapThreshold = TDomSeqAlignment::GAP_THRESHOLD;
    //CSegSet gaps;
	//dst.GetGaps(gaps);
    //const CSegSet::TSegs & rSegs = dst.GetSegs(), &rGapSegs = gaps.GetSegs();
    //
	//CSegSet::TSegs::const_iterator iter = rSegs.begin(), iterEnd = rSegs.end(), iterGap = rGapSegs.begin(), iterGapEnd = rGapSegs.end();
	//CSegSet::TSegs::const_iterator iter0 = iter, iter1 = iter0;
	//++iter;	//first seg always in
	//while (iterEnd != iter)
	//{
    //
	//	if (iterGapEnd != iterGap)
	//	{
	//		if (iterGap->to < iter->from)	//gap reached. Push in last segment
	//		{
	//			JSVar aSeg(eObject);
	//			aSeg[_PROP_FROM] = iter0->from;
	//			aSeg[_PROP_TO] = iter1->to;
	//			aSeg[_PROP_ORIFROM] = iter0->ori_from;
	//			aSeg[_PROP_ORITO] = dst.GetOriTo(iter1);
	//			segs.push(aSeg);
    //
	//			iter0 = iter;
	//			++iterGap;
	//		}
	//	}
    //
	//	iter1 = iter;
	//	++iter;
	//}
    //
	//// -- last seg
	//JSVar aSeg(eObject);
	//aSeg[_PROP_FROM] = iter0->from;
	//aSeg[_PROP_TO] = iter1->to;
	//aSeg[_PROP_ORIFROM] = iter0->ori_from;
	//aSeg[_PROP_ORITO] = dst.GetOriTo(iter1);
	//segs.push(aSeg);
	//pobj[_PROP_SEGS] = segs;

	//const CSegSet::TSegs & rSegs = dst.GetSegs(), &rGapSegs = gaps.GetSegs();
    //
	//CSegSet::TSegs::const_iterator iter = rSegs.begin(), iterEnd = rSegs.end(), iterGap = rGapSegs.begin(), iterGapEnd = rGapSegs.end();
	//CSegSet::TSegs::const_iterator iter0 = iter, iter1 = iter0;
    //
	//{{
	//	JSVar aRealSeg(eObject);
	//	aRealSeg[_PROP_FROM] = iter->from;
	//	aRealSeg[_PROP_TO] = iter->to;
	//	aRealSeg[_PROP_ORIFROM] = iter->ori_from;
	//	aRealSeg[_PROP_ORITO] = dst.GetOriTo(iter);
	//	realsegs.push(aRealSeg);
	//}}
    //
	//++iter;	//first seg always in
	//while (iterEnd != iter)
	//{
	//	JSVar aRealSeg(eObject);
	//	aRealSeg[_PROP_FROM] = iter->from;
	//	aRealSeg[_PROP_TO] = iter->to;
	//	aRealSeg[_PROP_ORIFROM] = iter->ori_from;
	//	aRealSeg[_PROP_ORITO] = dst.GetOriTo(iter);
	//	realsegs.push(aRealSeg);
    //
    //
	//	if (iterGapEnd != iterGap)
	//	{
	//		if (iterGap->to < iter->from)	//gap reached. Push in last segment
	//		{
	//			JSVar aSeg(eObject);
	//			aSeg[_PROP_FROM] = iter0->from;
	//			aSeg[_PROP_TO] = iter1->to;
	//			aSeg[_PROP_ORIFROM] = iter0->ori_from;
	//			aSeg[_PROP_ORITO] = dst.GetOriTo(iter1);
	//			segs.push(aSeg);
    //
	//			iter0 = iter;
	//			++iterGap;
	//		}
	//	}
    //
	//	iter1 = iter;
	//	++iter;
	//}
    //
	//// -- last seg
	//JSVar aSeg(eObject);
	//aSeg[_PROP_FROM] = iter0->from;
	//aSeg[_PROP_TO] = iter1->to;
	//aSeg[_PROP_ORIFROM] = iter0->ori_from;
	//aSeg[_PROP_ORITO] = dst.GetOriTo(iter1);
	//segs.push(aSeg);
    //
	//pobj[_PROP_SEGS] = segs;
	//pobj[_PROP_REALSEGS] = realsegs;

}

void TDomSeqAlignment::AmendJson(JSVar dsjson) const
{
    TSeqAlignment::AmendJson(dsjson);
    
    dsjson[_PROP_PSSMID] = m_uiPssmId;
	dsjson[_PROP_REGION] = m_iRegionIdx;
	dsjson[_PROP_ISSPEC] = IsSpecific();
	dsjson[_PROP_ISREP] = m_bRep;
	dsjson[_PROP_TRUNC_N] = (m_dNMissing > 0.2);
	dsjson[_PROP_TRUNC_C] = (m_dCMissing > 0.2);
	if (m_bLifted)
		dsjson[_PROP_ISLIFTED] = true;
	if (m_bSuppressed)
		dsjson[_PROP_ISSUPPRESSED] = true;
}

JSVar TDomSeqAlignment::CreateJson(void) const
{
	//JSVar domalnjson = TSeqAlignment::CreateJson();
    JSVar domalnjson(eObject);
    AmendJson(domalnjson);
	return domalnjson;
}

// void TDomSeqAlignment::AddAlignedSegs(JSVar & pobj) const
// {
    // JSVar segs(eArray), realsegs(eArray);
	// pobj[_PROP_SEGS] = segs;
	// pobj[_PROP_REALSEGS] = realsegs;

	// CSegSet dst;
	// //pAlign->CreateMasterSegs(dst);
	// CreateSlaveSegs(dst);

	// MapSegSet(dst, true);

	// dst.m_ulGapThreshold = TDomSeqAlignment::GAP_THRESHOLD;

	// CSegSet gaps;
	// dst.GetGaps(gaps);


	// const CSegSet::TSegs & rSegs = dst.GetSegs(), &rGapSegs = gaps.GetSegs();

	// CSegSet::TSegs::const_iterator iter = rSegs.begin(), iterEnd = rSegs.end(), iterGap = rGapSegs.begin(), iterGapEnd = rGapSegs.end();
	// CSegSet::TSegs::const_iterator iter0 = iter, iter1 = iter0;

	// {{
		// JSVar aRealSeg(eObject);
		// aRealSeg[_PROP_FROM] = iter->from;
		// aRealSeg[_PROP_TO] = iter->to;
		// aRealSeg[_PROP_ORIFROM] = iter->ori_from;
		// aRealSeg[_PROP_ORITO] = dst.GetOriTo(iter);
		// realsegs.push(aRealSeg);
	// }}

	// ++iter;	//first seg always in
	// while (iterEnd != iter)
	// {
		// JSVar aRealSeg(eObject);
		// aRealSeg[_PROP_FROM] = iter->from;
		// aRealSeg[_PROP_TO] = iter->to;
		// aRealSeg[_PROP_ORIFROM] = iter->ori_from;
		// aRealSeg[_PROP_ORITO] = dst.GetOriTo(iter);
		// realsegs.push(aRealSeg);


		// if (iterGapEnd != iterGap)
		// {
			// if (iterGap->to < iter->from)	//gap reached. Push in last segment
			// {
				// JSVar aSeg(eObject);
				// aSeg[_PROP_FROM] = iter0->from;
				// aSeg[_PROP_TO] = iter1->to;
				// aSeg[_PROP_ORIFROM] = iter0->ori_from;
				// aSeg[_PROP_ORITO] = dst.GetOriTo(iter1);
				// segs.push(aSeg);

				// iter0 = iter;
				// ++iterGap;
			// }
		// }

		// iter1 = iter;
		// ++iter;
	// }

	// // -- last seg
	// JSVar aSeg(eObject);
	// aSeg[_PROP_FROM] = iter0->from;
	// aSeg[_PROP_TO] = iter1->to;
	// aSeg[_PROP_ORIFROM] = iter0->ori_from;
	// aSeg[_PROP_ORITO] = dst.GetOriTo(iter1);
	// segs.push(aSeg);
// }

JSVar TDomSeqAlignment::CreateAlignedSegs(void) const
{
    JSVar retVal(eObject);
    AddSegs(retVal);
    
	//JSVar segs(eArray), realsegs(eArray);
	//retVal[_PROP_SEGS] = segs;
	//retVal[_PROP_REALSEGS] = realsegs;
    //
	//CSegSet dst;
	////pAlign->CreateMasterSegs(dst);
	//CreateSlaveSegs(dst);
    //
	//MapSegSet(dst, true);
    //
	//dst.m_ulGapThreshold = TDomSeqAlignment::GAP_THRESHOLD;
    //
	//CSegSet gaps;
	//dst.GetGaps(gaps);
    //
    //
	//const CSegSet::TSegs & rSegs = dst.GetSegs(), &rGapSegs = gaps.GetSegs();
    //
	//CSegSet::TSegs::const_iterator iter = rSegs.begin(), iterEnd = rSegs.end(), iterGap = rGapSegs.begin(), iterGapEnd = rGapSegs.end();
	//CSegSet::TSegs::const_iterator iter0 = iter, iter1 = iter0;
    //
	//{{
	//	JSVar aRealSeg(eObject);
	//	aRealSeg[_PROP_FROM] = iter->from;
	//	aRealSeg[_PROP_TO] = iter->to;
	//	aRealSeg[_PROP_ORIFROM] = iter->ori_from;
	//	aRealSeg[_PROP_ORITO] = dst.GetOriTo(iter);
	//	realsegs.push(aRealSeg);
	//}}
    //
	//++iter;	//first seg always in
	//while (iterEnd != iter)
	//{
	//	JSVar aRealSeg(eObject);
	//	aRealSeg[_PROP_FROM] = iter->from;
	//	aRealSeg[_PROP_TO] = iter->to;
	//	aRealSeg[_PROP_ORIFROM] = iter->ori_from;
	//	aRealSeg[_PROP_ORITO] = dst.GetOriTo(iter);
	//	realsegs.push(aRealSeg);
    //
    //
	//	if (iterGapEnd != iterGap)
	//	{
	//		if (iterGap->to < iter->from)	//gap reached. Push in last segment
	//		{
	//			JSVar aSeg(eObject);
	//			aSeg[_PROP_FROM] = iter0->from;
	//			aSeg[_PROP_TO] = iter1->to;
	//			aSeg[_PROP_ORIFROM] = iter0->ori_from;
	//			aSeg[_PROP_ORITO] = dst.GetOriTo(iter1);
	//			segs.push(aSeg);
    //
	//			iter0 = iter;
	//			++iterGap;
	//		}
	//	}
    //
	//	iter1 = iter;
	//	++iter;
	//}
    //
	//// -- last seg
	//JSVar aSeg(eObject);
	//aSeg[_PROP_FROM] = iter0->from;
	//aSeg[_PROP_TO] = iter1->to;
	//aSeg[_PROP_ORIFROM] = iter0->ori_from;
	//aSeg[_PROP_ORITO] = dst.GetOriTo(iter1);
	//segs.push(aSeg);

	return retVal;
}

void TDomSeqAlignment::SaveToCache(CDataCache &dc) const
{
    TSeqAlignment::SaveToCache(dc);
    dc << m_uiPssmId << m_iRegionIdx << m_dNMissing
        << m_dCMissing << m_bSpecQualified << m_iRepClass << m_bRep
        << m_bLifted << m_bSuppressed;
}

void TDomSeqAlignment::RestoreFromCache(CDataCache &dc)
{
    TSeqAlignment::RestoreFromCache(dc);
    dc >> m_uiPssmId >> m_iRegionIdx >> m_dNMissing
        >> m_dCMissing >> m_bSpecQualified >> m_iRepClass >> m_bRep
        >> m_bLifted >> m_bSuppressed;
}

int MapCdFeature(const TDomSite &rFeat, const TDomSeqAlignment &rAlign, SeqLen_t qLen, const string *dimAaData, CSegSet &dst)
{
	dst = rFeat;

	if (!rAlign.m_ClipSet.IsEmpty())
		dst.Cross(rAlign.m_ClipSet);

	rAlign.MapSegSet(dst, false);

	vector<TSeg_base::TResiduePos> vecMappedPos;
	int rfidx = rAlign.GetTranslatedPosMap(dst, qLen, vecMappedPos);

	if (rFeat.MotifCheck(vecMappedPos, dimAaData[rfidx]) > 0)
		dst.Clear();
	return rfidx;
}


void ComputedFeatLoc::AddCoords(JSVar dsjson) const
{
    JSVar coords(eArray), oriCoords(eArray);
    dsjson[_PROP_COORDS] = coords;
	dsjson[_PROP_ORICOORDS] = oriCoords;
    
    const CSegSet::TSegs &segs = m_segs.GetSegs();
    
	for (CSegSet::TSegs::const_iterator iter = segs.begin(), iterEnd = segs.end(); iterEnd != iter; ++iter)
	{

		for (SeqPos_t i = iter->from; i <= iter->to; ++i)
		{
			coords.push(i);
			oriCoords.push(m_segs.GetOriTo(iter, i));
		}
	}
}

void ComputedFeatLoc::AmendJson(JSVar dsjson) const
{
    if (TDomSite::eType_StructMotif != m_site->m_iType)
    {
        dsjson[_PROP_ISSPEC] = m_isspec;
        dsjson[_PROP_CLID] = m_clst;
        dsjson[_PROP_SRCDOM] = m_srcdom->m_uiPssmId;
    }
    m_site->AmendJson(dsjson);
    dsjson[_PROP_COMPLETENESS] = m_completeness;
}

JSVar ComputedFeatLoc::CreateJson(void) const
{
    JSVar fjson(eObject);
    AmendJson(fjson);
    AddCoords(fjson);
    return fjson;
}



void TDomSeqAlignIndex::CreateRecordSets(const vector<TDomSeqAlignment> &rAlignments, const TDomClusterIndexIfx & rDomInfo, vector< TDomSeqAlignIndex::__TCdAlignRecord> &rDomAligns, vector<TDomSeqAlignIndex::__TCdAlignRecord> &rFeatAligns, int mode) const
{
//	const vector<size_t> &rIndice = bConcise ? m_vecConciseIndice : m_vecSortedIndice;
	rDomAligns.clear();
	rFeatAligns.clear();

	size_t featIdx0 = 0;
	size_t amendCount = 0;
	size_t ccsBase = m_vecConciseIndice.size();

	if (TDataModes::e_rep == mode)	//all rep
	{

		for (size_t iidx = 0, iidxEnd = m_vecConciseIndice.size(); iidx < iidxEnd; ++iidx)
		{
			__TCdAlignRecord alignRec;
			alignRec.pAlign = &(rAlignments[m_vecConciseIndice[iidx]]);
			alignRec.pCdInfo = rDomInfo.FindCdInfo(alignRec.pAlign->m_uiPssmId);


			if (alignRec.pCdInfo->m_iClusterId > 0) alignRec.pClst = rDomInfo.FindClusterInfo(alignRec.pCdInfo->m_iClusterId);

			if (alignRec.pCdInfo->m_uiHierarchyRoot > 0)
			{
				if (alignRec.pCdInfo->m_uiHierarchyRoot == alignRec.pAlign->m_uiPssmId) alignRec.pRootCdInfo = alignRec.pCdInfo;
				else alignRec.pRootCdInfo = rDomInfo.FindCdInfo(alignRec.pCdInfo->m_uiHierarchyRoot);
			}
			alignRec.idx = m_vecConciseIndice[iidx];
			alignRec.idxidx = iidx;

			if (0 == alignRec.pAlign->m_iRepClass && alignRec.pAlign->m_bRep)	//non-multi
			{
				ExtractFeatAligns(alignRec, rAlignments, rDomInfo, rFeatAligns);

				size_t featIdx1 = rFeatAligns.size();

				for (size_t i = featIdx0; i < featIdx1; ++i)
				{
					if (rFeatAligns[i].idx == alignRec.idx)
						rFeatAligns[i].idxidx = alignRec.idxidx;
					else
						rFeatAligns[i].idxidx = iidxEnd + amendCount++;
				}

				featIdx0 = featIdx1;
			}

			rDomAligns.push_back(alignRec);
		}
	}
	else	//non-concise
	{
		size_t repIdx = 0;

		size_t featIdx0 = 0;
		size_t amendCount = 0;

		const vector<size_t> &nonConciseIdx = (TDataModes::e_std == mode ? m_vecStdIndice : m_vecSortedIndice);

		for (size_t iidx = 0, iidxEnd = nonConciseIdx.size(); iidx < iidxEnd; ++iidx)
		{
			__TCdAlignRecord alignRec;
			alignRec.pAlign = &(rAlignments[nonConciseIdx[iidx]]);
			alignRec.pCdInfo = rDomInfo.FindCdInfo(alignRec.pAlign->m_uiPssmId);
			if (alignRec.pCdInfo->m_iClusterId > 0) alignRec.pClst = rDomInfo.FindClusterInfo(alignRec.pCdInfo->m_iClusterId);
			if (alignRec.pCdInfo->m_uiHierarchyRoot > 0)
			{
				if (alignRec.pCdInfo->m_uiHierarchyRoot == alignRec.pAlign->m_uiPssmId) alignRec.pRootCdInfo = alignRec.pCdInfo;
				else alignRec.pRootCdInfo = rDomInfo.FindCdInfo(alignRec.pCdInfo->m_uiHierarchyRoot);
			}
			alignRec.idx = nonConciseIdx[iidx];
			alignRec.idxidx = -1;
			if (alignRec.pAlign->m_bRep)
			{
				alignRec.idxidx = repIdx++;
				if (0 == alignRec.pAlign->m_iRepClass)	//non-multi
				{
					ExtractFeatAligns(alignRec, rAlignments, rDomInfo, rFeatAligns);
					size_t featIdx1 = rFeatAligns.size();

					for (size_t i = featIdx0; i < featIdx1; ++i)
					{
						if (rFeatAligns[i].idx == alignRec.idx)
							rFeatAligns[i].idxidx = alignRec.idxidx;
						else
							rFeatAligns[i].idxidx = ccsBase + amendCount++;
					}
					featIdx0 = featIdx1;
				}
			}
			rDomAligns.push_back(alignRec);
		}
	}


	// -- added 9/9/2014 handling structure motifs -- attach to
	for (size_t iidx = 0, iidxEnd = m_vecSDIndice.size(); iidx < iidxEnd; ++iidx)
	{
		__TCdAlignRecord rec;

		rec.idx = m_vecSDIndice[iidx];

		rec.pAlign =  &(rAlignments[rec.idx]);

		rec.pCdInfo = rDomInfo.FindCdInfo(rec.pAlign->m_uiPssmId);

		rec.pClst = nullptr;

		rec.pRootCdInfo = nullptr;

		rec.idxidx = ccsBase + amendCount++;

		rFeatAligns.push_back(rec);
	}
}

void TDomSeqAlignIndex::CreateConciseAmends(const vector<TDomSeqAlignment> &rAlignments, const TDomClusterIndexIfx & rDomInfo, const vector<TDomSeqAlignIndex::__TCdAlignRecord> &rConciseAligns, vector<TDomSeqAlignIndex::__TCdAlignRecord> &rAmendAligns) const
{
	rAmendAligns.clear();
	for (size_t ics = 0, icsend = rConciseAligns.size(); ics < icsend; ++ics)
	{
		const __TCdAlignRecord &csrec = rConciseAligns[ics];
		if (0 == csrec.pAlign->m_iRepClass && !csrec.pCdInfo->m_bCurated)	//monodom non-curated
		{
			__TCdAlignRecord arec;

			for (size_t iiamd = 0, iiamdend = m_vecStdIndice.size(); iiamd < iiamdend; ++iiamd)
			{
				arec.pAlign = &(rAlignments[m_vecStdIndice[iiamd]]);
				if (0 == arec.pAlign->m_iRepClass && arec.pAlign->m_iRegionIdx == csrec.pAlign->m_iRegionIdx && arec.pAlign != csrec.pAlign)
				{
					arec.pCdInfo = rDomInfo.FindCdInfo(arec.pAlign->m_uiPssmId);
					if (nullptr != arec.pCdInfo && arec.pCdInfo->m_bCurated)
					{
						if (arec.pAlign->m_bSpecQualified || !csrec.pAlign->m_bSpecQualified)
						{
							arec.pClst = rDomInfo.FindClusterInfo(arec.pCdInfo->m_iClusterId);
							if (arec.pCdInfo->m_uiHierarchyRoot > 0) arec.pRootCdInfo = rDomInfo.FindCdInfo(arec.pCdInfo->m_uiHierarchyRoot);
							rAmendAligns.emplace_back(arec);
							goto labelNextHit;
						}
					}
				}
			}
		}
	labelNextHit:;
	}
}

void TDomSeqAlignIndex::CreateComputedFeatLocs(vector<ComputedFeatLoc> &sites, vector<ComputedFeatLoc> &motifs, const vector<TDomSeqAlignment> &rAlignments, const TDomClusterIndexIfx & rDomInfo, size_t seqLen, const string  &translated) const
{
    vector<size_t> featIndices(m_vecQualifiedFeatIndice.begin(), m_vecQualifiedFeatIndice.end());
    
    sites.clear();
    sites.reserve(m_vecQualifiedFeatIndice.size());
    
    motifs.clear();
    motifs.reserve(m_vecSDIndice.size());
    
    vector<ComputedFeatLoc> *dst = nullptr;
    
    for (size_t iidx = 0, iidxEnd = m_vecConciseIndice.size(); iidx < iidxEnd; ++iidx)
    {
        size_t repIdx = m_vecConciseIndice[iidx];
        const TDomSeqAlignment *align = &(rAlignments[repIdx]);
        // -- look for qualified feat align
        
        for (size_t fiidx = 0, ttlFeatIdx = featIndices.size(); fiidx < ttlFeatIdx; ++fiidx)
        {
            size_t fIdx = featIndices[fiidx];
            
            const TDomSeqAlignment *falign = align;
            
            const TDomain *alnDom = rDomInfo.FindCdInfo(falign->m_uiPssmId);
            
            if (fIdx != repIdx)
            {
                const TDomain *repDom = alnDom;
                falign = &(rAlignments[fIdx]);
                alnDom =  rDomInfo.FindCdInfo(falign->m_uiPssmId);
                
                if (alnDom->m_iClusterId != repDom->m_iClusterId)   //not qualified
                    continue;
            }
            
            //qualify
            
            bool is_spec = falign->IsSpecific();
            
            const list<TDomSite> *pFeatList = &(alnDom->m_lstSpecificFeatures);
            const TDomain *srcdom = alnDom;
            
            if (!is_spec)
            {
                pFeatList = &(alnDom->m_lstGenericFeatures);
                if (alnDom->m_uiHierarchyRoot)
                {
                    const TDomain * rdom = rDomInfo.FindCdInfo(alnDom->m_uiHierarchyRoot);
                    if (nullptr != rdom)
                        srcdom = rdom;
                }
            }
            
            double completeness = 0.0;
            for (const auto & f : *pFeatList)
            {
                CSegSet mapped_segs(f);
                
                if (!falign->m_ClipSet.IsEmpty())
                    mapped_segs.Cross(falign->m_ClipSet);
                
                if (TDomSite::eType_StructMotif != f.m_iType)   //regular site
                {
                    falign->MapSegSet(mapped_segs, false);
                    
                    if ((completeness = f.EvaluateCompleteness(mapped_segs)) < 0.8)
                        continue;
                    
                    vector<TSeg_base::TResiduePos> vecMappedPos;
                    falign->GetTranslatedPosMap(mapped_segs, seqLen, vecMappedPos);
                    if (f.MotifCheck(vecMappedPos, translated) > 0)
                        continue;
                    
                    falign->Pr2NaConvert(mapped_segs);
                    dst = &sites;
                }
                else if (is_spec)   //qualified motif
                {
                    falign->MapSegSet(mapped_segs, true);
                    if ((completeness = f.EvaluateCompleteness(mapped_segs)) < 0.8)
                        continue;
                    
                    dst = &motifs;
                }
                else
                    continue;
                
                dst->emplace_back();
                ComputedFeatLoc &target = dst->back();
                
                target.m_segs = move(mapped_segs);
                target.m_completeness = completeness;
                target.m_isspec = is_spec;
                target.m_clst = alnDom->m_iClusterId;
                
                target.m_site = &f;
                target.m_srcdom = srcdom;
            }
            
        }
    }
    
    // -- structure domains, only motifs
    for (size_t iidx = 0, iidxEnd = m_vecSDIndice.size(); iidx < iidxEnd; ++iidx)
    {
        size_t mIdx = m_vecSDIndice[iidx];
        const TDomSeqAlignment &align = rAlignments[mIdx];
        
        if (align.IsSpecific())
        {
            const TDomain * sd = rDomInfo.FindCdInfo(align.m_uiPssmId);
            if (nullptr != sd)
            {
                for (const auto & f: sd->m_lstSpecificFeatures)
                {
                    CSegSet mapped_segs(f);
                    if (!align.m_ClipSet.IsEmpty())
                        mapped_segs.Cross(align.m_ClipSet);
                    
                    align.MapSegSet(mapped_segs, true);
                    double completeness = f.EvaluateCompleteness(mapped_segs);
                    if (completeness >= 0.8)
                    {
                        motifs.emplace_back();
                        ComputedFeatLoc &target = motifs.back();
                        
                        target.m_segs = move(mapped_segs);
                        target.m_completeness = completeness;
                        target.m_isspec = true;
                        target.m_clst = 0;
                        target.m_site = &f;
                        target.m_srcdom = sd;
                    }
                    
                }
            }
        }
    }
}

void TDomSeqAlignIndex :: ExtractFeatAligns(const TDomSeqAlignIndex::__TCdAlignRecord &rRepRec, const vector<TDomSeqAlignment> &rAlignments, const TDomClusterIndexIfx & rDomInfo, vector<TDomSeqAlignIndex::__TCdAlignRecord> &rResult) const
{
	__TCdAlignRecord rec;
	if (rRepRec.pCdInfo->m_iClusterId > 0)	//has cluster, do cluster match
	{
		for (size_t fiidx = 0, fiidxEnd = m_vecQualifiedFeatIndice.size(); fiidx < fiidxEnd; ++fiidx)
		{
			rec.pAlign = &(rAlignments[m_vecQualifiedFeatIndice[fiidx]]);
			// -- check if duplicated, one domain may have multiple features
			for (const auto & r : rResult)
			{
				if (r.pAlign == rec.pAlign)
					goto labelSkip1;
			}

			if (rec.pAlign->m_iRegionIdx == rRepRec.pAlign->m_iRegionIdx)	//region match
			{
				rec.pCdInfo = rDomInfo.FindCdInfo(rec.pAlign->m_uiPssmId);
				if (rec.pCdInfo->m_iClusterId == rRepRec.pCdInfo->m_iClusterId)	//matched cluster
				{
					rec.pClst = rRepRec.pClst;
					rec.idx = m_vecQualifiedFeatIndice[fiidx];
					//rec.idxidx = FeatIdx2Iidx(rec.idx);
					if (rec.pCdInfo->m_uiHierarchyRoot > 0)
					{
						if (rec.pCdInfo->m_uiHierarchyRoot == rec.pAlign->m_uiPssmId) rec.pRootCdInfo = rec.pCdInfo;
						else rec.pRootCdInfo = rDomInfo.FindCdInfo(rec.pCdInfo->m_uiHierarchyRoot);
					}
					rResult.push_back(rec);
				}
			}
		labelSkip1:
			;
		}
	}
	else	//no cluster, just match region class -- should not happen
	{
		for (size_t fiidx = 0, fiidxEnd = m_vecQualifiedFeatIndice.size(); fiidx < fiidxEnd; ++fiidx)
		{
			rec.pAlign = &(rAlignments[m_vecQualifiedFeatIndice[fiidx]]);

			// -- check if duplicated
			for (const auto & r : rResult)
			{
				if (r.pAlign == rec.pAlign)
					goto labelSkip2;
			}
			if (rec.pAlign->m_iRegionIdx == rRepRec.pAlign->m_iRegionIdx)	//region match
			{
				rec.pCdInfo = rDomInfo.FindCdInfo(rec.pAlign->m_uiPssmId);
				rec.pClst = rDomInfo.FindClusterInfo(rec.pCdInfo->m_iClusterId);
				if (rec.pCdInfo->m_uiHierarchyRoot > 0)
				{
					if (rec.pCdInfo->m_uiHierarchyRoot == rec.pAlign->m_uiPssmId) rec.pRootCdInfo = rec.pCdInfo;
					else rec.pRootCdInfo = rDomInfo.FindCdInfo(rec.pCdInfo->m_uiHierarchyRoot);
				}
				rec.idx = m_vecQualifiedFeatIndice[fiidx];
				//rec.idxidx = FeatIdx2Iidx(rec.idx);
				rResult.push_back(rec);
			}
		labelSkip2:
			;
		}
	}
}


JSVar TDomSeqAlignIndex::__TComputedFeatHit::CreateSiteLocArray(void) const
{
    JSVar mapArrays(eObject), coords(eArray), oriCoords(eArray);
	mapArrays[_PROP_COORDS] = coords;
	mapArrays[_PROP_ORICOORDS] = oriCoords;
	const CSegSet::TSegs &segs = GetSegs();
    
    int residues = 0;
    
	for (CSegSet::TSegs::const_iterator iter = segs.begin(), iterEnd = segs.end(); iterEnd != iter; ++iter)
	{

		for (SeqPos_t i = iter->from; i <= iter->to; ++i)
		{
			coords.push(i);
            ++residues;
			oriCoords.push(GetOriTo(iter, i));
		}
	}
    
    mapArrays[_PROP_COMPLETENESS] = (double)(residues) / (double)(_site->m_iCompleteSize);
	return mapArrays;
}

const vector<size_t> & TDomSeqAlignIndex::x_GetModeIndex(int mode) const
{
    switch (mode)
    {
    case TDataModes::e_full:
        return m_vecSortedIndice;
    case TDataModes::e_std:
        return m_vecStdIndice;
    default:
        break;
    }
    return m_vecConciseIndice;
}

void TDomSeqAlignIndex::AddIndices(JSVar &pobj) const
{
    JSVar c(eArray);
    for (auto v : m_vecConciseIndice)
        c.push(JSVar(v));
    
    pobj[TDataModes::dimLits[TDataModes::e_rep]] = c;
    
    JSVar s(eArray);
    for (auto v : m_vecStdIndice)
        s.push(JSVar(v));
    
    pobj[TDataModes::dimLits[TDataModes::e_std]] = s;
    
    JSVar f(eArray);
    for (auto v : m_vecSortedIndice)
        f.push(JSVar(v));
    
    pobj[TDataModes::dimLits[TDataModes::e_full]] = f;

	JSVar d(eArray);
    for (auto v : m_vecSDIndice)
        d.push(JSVar(v));
    
	pobj[STRUCTSIG] = d;
}

void TDomSeqAlignIndex::ComputeMappedFeats(vector<TDomSeqAlignIndex::__TComputedFeatHit> & dst, const vector<TDomSeqAlignment> &rAlignments, const vector<TDomSeqAlignIndex::__TComputedDomHit> &rRepDomAligns, const TDomClusterIndexIfx & rDomInfo, size_t seqLen, const string dimAaData[]) const
{
    for (const auto & v : rRepDomAligns)
    {
        if (0 == v._align->m_iRepClass && v._align->m_bRep)	//non-multi
		{
            // -- search for qualified alignments
            for (size_t falign_idx : m_vecQualifiedFeatIndice)
            {
                const TDomSeqAlignment * falign = &rAlignments[falign_idx];
                if (falign->m_iRegionIdx == v._align->m_iRegionIdx)
                {
                    const TDomain * fdom = (falign == v._align ? v._align_dom : rDomInfo.FindCdInfo(falign->m_uiPssmId));
                    if (nullptr != fdom && fdom->m_iClusterId == v._align_dom->m_iClusterId)
                    {
                        const TDomain *pSrcCdInfo = fdom;
                        
                        const list<TDomSite> *pFeatList = &(fdom->m_lstSpecificFeatures);
                        
                        bool is_spec = falign->IsSpecific();
                

                        if (!is_spec && fdom->m_uiHierarchyRoot > 0)
                        {
                            const TDomain * frootdom = rDomInfo.FindCdInfo(fdom->m_uiHierarchyRoot);
                            if (nullptr != frootdom)
                            {
                                pFeatList = &(fdom->m_lstGenericFeatures);
                                pSrcCdInfo = frootdom;
                            }
                        }
                        
                        int feat_idx = 0;
                        for (const TDomSite & aFeat : *pFeatList)
                        {
                            TDomSeqAlignIndex::__TComputedFeatHit fhit(&aFeat, is_spec, fdom, feat_idx, pSrcCdInfo);
                            if (TDomSite::eType_StructMotif != aFeat.m_iType)	//normal sites
                            {
                                // -- this 
                                MapCdFeature(aFeat, *falign, seqLen, dimAaData, fhit);
                                
                            }
                            else if (is_spec)	//is motif
                            {
                                // -- we do not need motif check
                                fhit.CSegSet::operator = (aFeat);
                                falign->MapSegSet(fhit);
                            }
                            else
                                goto labelSkip;
                            
                            if (aFeat.EvaluateCompleteness(fhit) >= 0.8)
                            {
                                dst.emplace_back(move(fhit));
                            }
                        labelSkip:
                            ++feat_idx;
                        }
                    }
                }
            }

		}
    }
    
}


void TDomSeqAlignIndex::SaveToCache(CDataCache &dc) const
{
    CachePODData(dc, m_vecSortedIndice);
    CachePODData(dc, m_vecConciseIndice);
    CachePODData(dc, m_vecStdIndice);
    CachePODData(dc, m_vecQualifiedFeatIndice);
    CachePODData(dc, m_vecSDIndice);
}

void TDomSeqAlignIndex::RestoreFromCache(CDataCache &dc)
{
    RestorePODData(dc, m_vecSortedIndice);
    RestorePODData(dc, m_vecConciseIndice);
    RestorePODData(dc, m_vecStdIndice);
    RestorePODData(dc, m_vecQualifiedFeatIndice);
    RestorePODData(dc, m_vecSDIndice);
}


JSVar TDomAnnot::CreateIndexArray(int rfidx, int mode) const
{
    const vector<size_t> & idx = m_dimSplitAligns[rfidx].x_GetModeIndex(mode);
    
    JSVar all_aligns(eArray);
    
    for (auto v : idx)
        all_aligns.push(JSVar(v));
    return all_aligns;
}

JSVar TDomAnnot::CreateAlignArray(int rfidx, int mode) const
{
    const vector<size_t> & idx = m_dimSplitAligns[rfidx].x_GetModeIndex(mode);
    
    JSVar all_aligns(eArray);
    
    for (auto v : idx)
    {
        JSVar an_annot = m_vecAlignments[v].CreateJson();
        m_vecAlignments[v].AddSegs(an_annot);
        all_aligns.push(an_annot);
    }
    return all_aligns;
}

JSVar TDomAnnot::CreateAllAlignArray(void) const
{
    JSVar all_aligns(eArray);
    for (auto & v : m_vecAlignments)
        all_aligns.push(v.CreateJson());
    return all_aligns;
}

void TDomAnnot::SaveToCache(CDataCache &dc) const
{
    dc << m_iDataSrc;
    CacheArray(dc, m_vecAlignments);
    for (size_t i = 0; i < READINGFRAME::TOTAL_RFS; ++i)
		m_dimSplitAligns[i].SaveToCache(dc);
}

void TDomAnnot::RestoreFromCache(CDataCache &dc)
{
    dc >> m_iDataSrc;
    RestoreArray(dc, m_vecAlignments);
    for (size_t i = 0; i < READINGFRAME::TOTAL_RFS; ++i)
		m_dimSplitAligns[i].RestoreFromCache(dc);
}

void TDomQuery::SaveToCache(CDataCache &dc) const
{
    TSequence::SaveToCache(dc);
    TDomAnnot::SaveToCache(dc);
}

void TDomQuery::RestoreFromCache(CDataCache &dc)
{
    TSequence::RestoreFromCache(dc);
    TDomAnnot::RestoreFromCache(dc);
}


void TSnpData::ConstructTitle(string& rDest) const
{
	rDest = ConstructTitle();
}
string TSnpData::ConstructTitle(void) const
{
	char dimBuf[12];
	sprintf(dimBuf, ":%c%d%c", cOriRes, iMstPos + RESIDUE_DISPLAY_OFFSET, cMutRes);
	return strType + strId + dimBuf;
}

JSVar TSnpData::CreateJson(void) const
{
	return JSVar(ConstructTitle());
}

void TSnpData::SaveToCache(CDataCache &dc) const
{
    dc << iSnpId << iSnpTitle << iMstPos << iNbrPos << strType << strId
        << cOriRes << cMutRes;
}

void TSnpData::RestoreFromCache(CDataCache &dc)
{
    dc >> iSnpId >> iSnpTitle >> iMstPos >> iNbrPos >> strType >> strId
        >> cOriRes >> cMutRes;
}

void TBlastParams::SaveToCache(CDataCache &dc) const
{
    dc << m_strDbPath << m_strDbName << m_dEValCutOff << m_bLCFilter << m_bCompBasedAdj << m_nMaxHits;
}

void TBlastParams::RestoreFromCache(CDataCache &dc)
{
    dc >> m_strDbPath >> m_strDbName >> m_dEValCutOff >> m_bLCFilter >> m_bCompBasedAdj >> m_nMaxHits;
}

void TBlastProcessInfo::SaveToCache(CDataCache &dc) const
{
    dc << m_strDartServer << m_strDartDb << m_strDartVer << m_strBlastProgram
        << m_strBlastService << m_strSearchCreator << m_blStatus
        << m_bProcessed;
    CachePODData(dc, m_vecStatusMsgs);
}

void TBlastProcessInfo::RestoreFromCache(CDataCache &dc)
{
    dc >> m_strDartServer >> m_strDartDb >> m_strDartVer >> m_strBlastProgram
        >> m_strBlastService >> m_strSearchCreator >> m_blStatus
        >> m_bProcessed;
    RestorePODData(dc, m_vecStatusMsgs);
}



TFlatDomClusterMap::TFlatDomClusterMap(TFlatDomClusterMap && other):
    Cacheable(), CDomClusterIndex(), m_cdstore(move(other.m_cdstore)), m_clstore(move(other.m_clstore))
{
    RefreshIndices();
}


TFlatDomClusterMap & TFlatDomClusterMap::operator = (TFlatDomClusterMap && other)
{
    m_cdstore = move(other.m_cdstore);
    m_clstore = move(other.m_clstore);
    RefreshIndices();
    return *this;
}


TFlatDomClusterMap::TFlatDomClusterMap(const vector< const TDomain * > & doms, const vector< const TCluster * > & clsts):
    Cacheable(), CDomClusterIndex(), m_cdstore(), m_clstore()
{
    x_CopyDomsClusters(doms, clsts);
}


void TFlatDomClusterMap::Replace(const vector< const TDomain * > & doms, const vector< const TCluster * > & clsts)
{
    m_cdstore.Reset();
    m_clstore.Reset();
    x_CopyDomsClusters(doms, clsts);
}



void TFlatDomClusterMap::RefreshIndices(void)
{
	vector<CPartDomainStore::DATA_PTR> domptrs = m_cdstore.GetPointers();
	m_pssmid2cd.Reset(domptrs);
	m_acxn2cd.Reset(move(domptrs));

	vector<CPartClusterStore::DATA_PTR> clptrs = m_clstore.GetPointers();
	m_pssmid2fam.Reset(clptrs);
	m_clid2fam.Reset(move(clptrs));
}


void TFlatDomClusterMap::MoveIn(CPartDomainStore &&doms, CPartClusterStore && cls)
{
	m_cdstore = move(doms);
	m_clstore = move(cls);

	RefreshIndices();
}


LJSON::JSVar TFlatDomClusterMap::CreateClusterCollection(void) const
{
    vector<CPartClusterStore::CONST_DATA_PTR> ptrvec;
    m_clstore.CreatePtrVector(ptrvec);
    LJSON::JSVar ret(LJSON::eObject);
    for (auto v : ptrvec)
    {
        ret[to_string(v->m_iClusterId)] = v->CreateJson();
    }
    
    return ret;
}


LJSON::JSVar TFlatDomClusterMap::CreateDomainCollection(void) const
{
    vector<CPartDomainStore::CONST_DATA_PTR> ptrvec;
    m_cdstore.CreatePtrVector(ptrvec);
    LJSON::JSVar ret(LJSON::eObject);
    for (auto v : ptrvec)
        ret[to_string(v->m_uiPssmId)] = v->CreateJson();
    
    return ret;
}

LJSON::JSVar TFlatDomClusterMap::CreateClusterCollection_acxn(void) const
{
    vector<CPartClusterStore::CONST_DATA_PTR> ptrvec;
    m_clstore.CreatePtrVector(ptrvec);
    LJSON::JSVar ret(LJSON::eObject);
    char dimBuf[32];
    for (auto v : ptrvec)
    {
        sprintf(dimBuf, "cl%05d", v->m_iClusterId);
        ret[dimBuf] = v->CreateJson();
    }
    
    return ret;
}


LJSON::JSVar TFlatDomClusterMap::CreateDomainCollection_acxn(bool include_feats) const
{
    vector<CPartDomainStore::CONST_DATA_PTR> ptrvec;
    m_cdstore.CreatePtrVector(ptrvec);
    LJSON::JSVar ret(LJSON::eObject);
    for (auto v : ptrvec)
        ret[v->m_strAccession] = v->CreateJson(include_feats);
    
    return ret;
}


void TFlatDomClusterMap::SaveToCache(CDataCache &dc) const
{
	CacheCompactStore(dc, m_cdstore);
    CacheCompactStore(dc, m_clstore);
}



void TFlatDomClusterMap::RestoreFromCache(CDataCache &dc)
{
	RestoreCompactStore(dc, m_cdstore);
    RestoreCompactStore(dc, m_clstore);
    RefreshIndices();
}


void TFlatDomClusterMap::x_CopyDomsClusters(const vector< const TDomain * > & doms, const vector< const TCluster * > & clsts)
{
    size_t ttl = doms.size();
    vector<TDomain *> domptrs;
    domptrs.reserve(ttl);
    
    for (auto pdom : doms)
        domptrs.push_back(m_cdstore.Append(*pdom));
    
    m_pssmid2cd.Reset(domptrs);
	m_acxn2cd.Reset(move(domptrs));
    
    
    ttl = clsts.size();
    vector<TCluster *> clptrs;
    clptrs.reserve(ttl);
    
    for (auto pclst : clsts)
        clptrs.push_back(m_clstore.Append(*pclst));
    
    m_pssmid2fam.Reset(clptrs);
	m_clid2fam.Reset(move(clptrs));
}




void ReplaceEntities(string &dst)
{
	size_t strlen = dst.size();

	if (strlen < 4) return;

	size_t idx0 = dst.find('&');


	while (string::npos != idx0)
	{
		size_t idx1 = dst.find(';', idx0);
		if (string::npos == idx1)
			break;

		if ('#' == dst[idx0 + 1])	//hash number
		{
			unsigned int charCode = 0;
			if ('x' == dst[idx0 + 2] || 'X' == dst[idx0 + 2])
				sscanf(dst.substr(idx0 + 3, idx1 - idx0 - 2).c_str(), "%x", &charCode);
			else
				sscanf(dst.substr(idx0 + 2, idx1 - idx0 - 1).c_str(), "%u", &charCode);

			if (charCode >= 0x20 && charCode < 127)
				dst.replace(idx0, idx1 - idx0 + 1, 1, char(charCode));
			else
				dst.replace(idx0, idx1 - idx0 + 1, 1, '_');
		}
		else
		{
			string entname = dst.substr(idx0 + 1, idx1 - idx0 - 1);
			if (entname == "apos")
				dst.replace(idx0, idx1 - idx0 + 1, 1, '\'');
			else if (entname == "lt")
				dst.replace(idx0, idx1 - idx0 + 1, 1, '<');
			else if (entname == "gt")
				dst.replace(idx0, idx1 - idx0 + 1, 1, '>');
			else if (entname == "quot")
				dst.replace(idx0, idx1 - idx0 + 1, 1, '"');
			else if (idx1 - idx0 < 9)	//unknown entity, use _
				dst.replace(idx0, idx1 - idx0 + 1, 1, '_');
		}

		// -- next
		strlen = dst.size();
		idx0 = dst.find('&', idx0 + 1);
	}
}

void GetShortDomainDefline(const string &ori, string &dst)
{

	vector<size_t> dps;	//delimit positions
	size_t start_pos = 0, len = ori.size();	//usually this

	size_t last_pos = start_pos, pos = ori.find(':', last_pos);

	if (string::npos != pos && pos - last_pos <= 10) 	//get rid of short name prefix
	{
		start_pos = pos + 1;
		last_pos = start_pos;
	}


	while (start_pos < len)
	{
		if (isalnum(ori[start_pos]))	//must start with alphanumeric
			break;
		++start_pos;
	}

	dps.reserve(16);	//estimation

	pos = ori.find(';', last_pos);

	//int isdigit ( int c );
	//int isalnum ( int c );



	while (string::npos != pos)
	{
		// -- check if it is an entity
		// -- entity name could be very long, also could be in numeric format
		size_t i = pos;
		bool hash_found = false;
		while (i > last_pos)
		{
			//int isdigit ( int c );size_t i = pos;
			char tc = ori[i - 1];

			if (hash_found)
			{
				if ('&' != tc)	//invalid entity
					break;
				goto labelNextPos;
			}
			else if ('#' == tc)
				hash_found = true;
			else if ('&' == tc)
				goto labelNextPos;
			else if (!isalnum(tc))
				break;
			--i;
		}
		dps.push_back(pos);

	labelNextPos:
		last_pos = pos + 1;
		pos = ori.find(';', last_pos);
	}

	// now check for '.'
	last_pos = start_pos;
	pos = ori.find('.', last_pos);
	while (string::npos != pos)
	{
		dps.push_back(pos);
		last_pos = pos + 1;
		pos = ori.find('.', pos + 1);
	}


	if (!dps.empty())	//no delimits found, give the
	{
		size_t posIdx = 0, posTtl = dps.size(), p = 0, s = 0, c = 0;	//counter for parenthes, square bracket, curly bracket
		last_pos = start_pos;
		size_t charIdx = last_pos;



		while (posIdx < posTtl)
		{
			pos = dps[posIdx];

			while (charIdx < pos)
			{
				switch (ori[charIdx])
				{
				case '(':
					++p;
					break;
				case '[':
					++s;
					break;
				case '{':
					++c;
					break;
				case ')':
					--p;
					break;
				case ']':
					--s;
					break;
				case '}':
					--c;
					break;
				}

				++charIdx;
			}

			if (0 == p && 0 == s && 0 == c)	//good
			{
				dst = ori.substr(start_pos, pos - start_pos);

				return;
			}

			// -- next position
			++posIdx;
		}
	}
	// -- no qualified delimit found. get the whole thing

	dst = ori;
}

string TruncateDefline(const string& strFullDefline, size_t uiCutOff)
{
	static const size_t k_ulRightSoftMargin = 6;
	size_t ulTotalChars = strFullDefline.size();

	if (ulTotalChars <= uiCutOff) return strFullDefline;
	if (uiCutOff <= 3)
		return "...";

	size_t idxLeft = uiCutOff - 1, idxRight = uiCutOff;	//check first non-alnum from both side

	while (idxRight < ulTotalChars)
	{
		if (idxRight > k_ulRightSoftMargin)
		{
			idxRight = 0;	//fail
			break;
		}
		char currCh = strFullDefline[idxRight];
		if (isalnum(currCh)) ++idxRight;
		else
		{
			switch (currCh)
			{
				case '_':
				case '-':
					++idxRight;
					break;
				default:
					goto labelRightDone;

			}
		}

	}
labelRightDone:
	if (idxRight >= ulTotalChars) idxRight = 0;

	while (idxLeft > 0)
	{
		char currCh = strFullDefline[idxLeft];
		if (isalnum(currCh)) --idxLeft;
		else
		{
			switch (currCh)
			{
				case '_':
				case '-':
					--idxLeft;
					break;
				default:
					goto labelLeftDone;
			}
		}
	}

labelLeftDone:

	if (idxRight > 0 && idxRight - uiCutOff < uiCutOff - idxLeft) idxLeft = idxRight;
	if (0 == idxLeft)	//no breakpoint, use hard truncate
	{
		return strFullDefline.substr(0, uiCutOff) + "...";
	}

	string result(k_strEmptyString);

	switch (strFullDefline[idxLeft])
	{
		case '.':
		case ',':
		case ')':
		case ']':
		case '}':
		case '!':
		case ';':
		case ':':
		case '?':
			result = strFullDefline.substr(0, idxLeft + 1);
			result.append(" ...");
			return result;
		case ' ':
			result = strFullDefline.substr(0, idxLeft + 1);
			result.append("...");
			return result;
		default:
			result = strFullDefline.substr(0, idxLeft);
			result.append(" ...");
			return result;
	}
}


string CreateMD5SeqIdStr(const string &seqdata)
{
	return string(k_lpszEaaMD5) + "_" + CalcSeqMD5(seqdata);
}


void FastaAddLocalId(string& rBareSeq)
{
	rBareSeq = string(">lcl|") + CreateMD5SeqIdStr(rBareSeq) + "\n" + rBareSeq;
}

string FastaAddLocalId(const string& rBareSeq)
{
	string t(rBareSeq);
	FastaAddLocalId(t);
	return t;
}

void ParseUserDefl(const string& rUsrDefl, string& rIdStr, string& rDefl)
{
	char delim = '|';
	size_t ttlLen = rUsrDefl.size();
	if (ttlLen > 0)
	{
		size_t csr0 = 0;

		while (csr0 < ttlLen && ('>' == rUsrDefl[csr0] || rUsrDefl[csr0] <= 32)) ++csr0;	//skip
		if (csr0 < ttlLen)
		{
			vector<string> vecParsed;
			while (csr0 < ttlLen)
			{
				size_t csr1 = rUsrDefl.find(delim, csr0);
				if (string::npos == csr1) csr1 = ttlLen;

				vecParsed.emplace_back(rUsrDefl.substr(csr0, csr1 - csr0));
				csr0 = csr1 + 1;
			}

			size_t totalSegs = vecParsed.size();
			size_t idFields = totalSegs / 2;

			if (idFields > 0)
			{
				char dimSep[4] = {0, 0, 0, 0};
				if (!rIdStr.empty())
				{
					dimSep[0] = '(';
					dimSep[2] = ')';
				}

				for (size_t i = 0; i < idFields; ++i)
				{
					size_t ii = i + i;
					rIdStr += (dimSep + vecParsed[ii]);
					dimSep[0] = delim;
					rIdStr += (dimSep + vecParsed[ii + 1]);
				}
				rIdStr.append(dimSep + 2);
			}

			idFields += idFields;	//double
			if (totalSegs > idFields)	//odd number
				rDefl = vecParsed[idFields];

		}
	}
}


string CalcSeqMD5(const string &seqdata)
{
	size_t len = seqdata.size();
	string cleaned(NcbiEmptyString);
	cleaned.reserve(len);

	for (string::const_iterator i = seqdata.begin(), ie = seqdata.end(); ie != i; ++i)
		if (isalpha(*i))
			cleaned.push_back(toupper(*i));
		else
			switch (*i)
			{
			case '(':
			case ')':
			case '=':
			case '.':
			case ',':
			case '/':
				cleaned.push_back(*i);
				break;
			default:;	//skip
			}

	return MD5Digest(cleaned);
}

string GetComplementSeq(const string &rSeq)
{
	string result(k_strEmptyString);
	result.reserve(rSeq.size());
	for (string::const_reverse_iterator riter = rSeq.rbegin(), riterEnd = rSeq.rend(); riter != riterEnd; ++riter)
	{
		char cap = toupper(*riter);
		switch (cap)
		{
		case 'A':
			result.push_back('T');
			break;

		case 'C':
			result.push_back('G');
			break;

		case 'G':
			result.push_back('C');
			break;

		case 'T':
		case 'U':
			result.push_back('A');
			break;
		case 'R':	//A/G, purine
			result.push_back('Y');
			break;
		case 'Y':	//C/T/U: pyrimidine
			result.push_back('R');
			break;
		case 'M':	//A or C
			result.push_back('K');
			break;
		case 'K':	//G or T
			result.push_back('M');
			break;
		case 'S':	//Strong, C or G
			result.push_back('W');
			break;
		case 'W':	//Weak, A or T
			result.push_back('S');
			break;
		case 'H':	//not G
			result.push_back('D');
			break;
		case 'D':	//not C
			result.push_back('H');
			break;
		case 'B':	//Not A
			result.push_back('V');
			break;
		case 'V':	//Not T or U
			result.push_back('B');
			break;
		case 'N':	//anything
			result.push_back('N');
			break;
		}
	}
	return result;
}


string RemoveDupPrefix(const string &pfx, const string &txt)
{
	size_t pfxlen = pfx.size(), txtlen = txt.size();
	if (0 == pfxlen || 0 == txtlen) return txt;

	if (pfxlen >= txtlen)
	{
		for (size_t i = 0; i < txtlen; ++i)
			if (pfx[i] != txt[i])
				goto labelNoTruncate;
		return k_strEmptyString;
	}
	else
	{
		size_t idx = 0;
		while (idx < pfxlen)
		{
			if (pfx[idx] != txt[idx])
				goto labelNoTruncate;
			++idx;
		}
		switch (txt[idx])
		{
		case ',':
		case '.':
		case ':':
		case ';':	//do truncate
			++idx;
			break;
		default:	//no trucation
			goto labelNoTruncate;
		}

		while (idx < txtlen)
		{
			if (txt[idx] <= 32 || '.' == txt[idx] || ',' == txt[idx] || ';' == txt[idx] || ':' == txt[idx])
				++idx;
			else
				goto labelTruncate;
		}

		return k_strEmptyString;
	labelTruncate:
		return txt.substr(idx, txtlen - idx);

	}
labelNoTruncate:
	return txt;
}

bool IsValidGeneticCode(int gc)
{
	return ((gc >= 1 && gc <= 6) || (gc >= 9 && gc <= 14) || (gc >= 21 && gc <= 25) || 16 == gc);
}

string GetSeqTitleFromDefline(const string &defl)
{
    size_t l = 0, r = defl.size(), m = 0;
    
    if (!defl.empty())
    {
        // -- first check tailing organism name
        if (']' == defl[r-1])
        {
            m = defl.rfind('[');
            if (string::npos == m)  //no match
                --r;    //skip dangling ']'
            else
                r = m-1;
        }
        
        m = defl.find('=');
        if (string::npos != m)
        {
            if (m < r)
                l = m + 1;
        }
        
        
        
        
        m = defl.find('.', l);
        
        if (string::npos != m)
        {
            if (m < r && !isdigit(defl[m + 1]))
                r = m;
            
        }
        
        m = defl.find(';', l);
        
        if (string::npos != m)
        {
            if (m < r)
                r = m;
        }
        
        m = defl.find('-', l);
        if (string::npos != m)
        {
            if (m < r && defl[m+1] <= 0x20)
            {
                
                while (m > 0 && defl[m-1] <= 0x20)
                    --m;
                r = m;
            }
            
        }
    }
    
    return defl.substr(l, r - l);
}



