sot-talos-balance  1.7.0
Markup.cpp
Go to the documentation of this file.
1 #define _CRT_SECURE_NO_WARNINGS
2 #define NOMINMAX
3 
4 // Markup.cpp: implementation of the NBC_CMarkup class.
5 //
6 // NBC_CMarkup Release 6.5 Lite
7 // Copyright (C) 1999-2003 First Objective Software, Inc. All rights reserved
8 // This entire notice must be retained in this source code
9 // Redistributing this source code requires written permission
10 // This software is provided "as is", with no warranty.
11 // Latest fixes enhancements and documentation at www.firstobject.com
12 
13 #include <assert.h>
14 #include <algorithm>
15 #include <stdarg.h>
16 #include <string.h>
17 
19 
20 #ifdef _DEBUG
21 #undef THIS_FILE
22 static char THIS_FILE[]=__FILE__;
23 #define new DEBUG_NEW
24 #endif
25 
26 void CMarkup::SetIndent(int nIndent)
27 {
28  mnIndent = nIndent;
29 }
30 
31 void CMarkup::operator=(const CMarkup& markup)
32 {
33  m_iPosParent = markup.m_iPosParent;
34  m_iPos = markup.m_iPos;
35  m_iPosChild = markup.m_iPosChild;
36  m_iPosFree = markup.m_iPosFree;
37  m_nNodeType = markup.m_nNodeType;
38  m_aPos.clear();
39  m_aPos = markup.m_aPos;
40  m_csDoc = markup.m_csDoc;
42 }
43 
44 bool CMarkup::SetDoc(const char* szDoc)
45 {
46  // Reset indexes
47  m_iPosFree = 1;
48  ResetPos();
49 
50  // Set document text
51  if (szDoc)
52  m_csDoc = szDoc;
53  else
54  m_csDoc.erase();
55 
56  // Starting size of position array: 1 element per 64 bytes of document
57  // Tight fit when parsing small doc, only 0 to 2 reallocs when parsing large doc
58  // Start at 8 when creating new document
59  std::string::size_type nStartSize = m_csDoc.length() / 64 + 8;
60  if (m_aPos.size() < nStartSize)
61  m_aPos.resize(nStartSize);
62 
63  // Parse document
64  bool bWellFormed = false;
65  if (m_csDoc.length())
66  {
67  m_aPos[0].Clear();
68  int iPos = x_ParseElem(0);
69  if (iPos > 0)
70  {
71  m_aPos[0].iElemChild = iPos;
72  bWellFormed = true;
73  }
74  }
75 
76  // Clear indexes if parse failed or empty document
77  if (! bWellFormed)
78  {
79  m_aPos[0].Clear();
80  m_iPosFree = 1;
81  }
82 
83  ResetPos();
84 
85  memset(mtIndent, ' ', sizeof(mtIndent));
86  mtIndent[999] = 0;
87  return bWellFormed;
88 }
89 
91 {
92  if (!(m_aPos.empty()) && m_aPos[0].iElemChild)
93  return true;
94  return false;
95 }
96 
97 bool CMarkup::FindElem(const char* szName)
98 {
99  // Change current position only if found
100  //
101  if (!m_aPos.empty())
102  {
103  int iPos = x_FindElem(m_iPosParent, m_iPos, szName);
104  if (iPos)
105  {
106  // Assign new position
107  x_SetPos(m_aPos[iPos].iElemParent, iPos, 0);
108  return true;
109  }
110  }
111  return false;
112 }
113 
114 bool CMarkup::FindChildElem(const char* szName)
115 {
116  // Change current child position only if found
117  //
118  // Shorthand: call this with no current main position
119  // means find child under root element
120  if (! m_iPos)
121  FindElem();
122 
123  int iPosChild = x_FindElem(m_iPos, m_iPosChild, szName);
124  if (iPosChild)
125  {
126  // Assign new position
127  int iPos = m_aPos[iPosChild].iElemParent;
128  x_SetPos(m_aPos[iPos].iElemParent, iPos, iPosChild);
129  return true;
130  }
131 
132  return false;
133 }
134 
135 
136 std::string CMarkup::GetTagName() const
137 {
138  // Return the tag name at the current main position
139  std::string csTagName;
140 
141 
142  if (m_iPos)
143  csTagName = x_GetTagName(m_iPos);
144  return csTagName;
145 }
146 
148 {
149  // If there is no child position and IntoElem is called it will succeed in release 6.3
150  // (A subsequent call to FindElem will find the first element)
151  // The following short-hand behavior was never part of EDOM and was misleading
152  // It would find a child element if there was no current child element position and go into it
153  // It is removed in release 6.3, this change is NOT backwards compatible!
154  // if (! m_iPosChild)
155  // FindChildElem();
156 
157  if (m_iPos && m_nNodeType == MNT_ELEMENT)
158  {
160  return true;
161  }
162  return false;
163 }
164 
166 {
167  // Go to parent element
168  if (m_iPosParent)
169  {
171  return true;
172  }
173  return false;
174 }
175 
177 // Private Methods
179 
181 {
182  //
183  // This returns the index of the next unused ElemPos in the array
184  //
185  if (m_iPosFree == (int)m_aPos.size())
186  m_aPos.resize(m_iPosFree + m_iPosFree / 2);
187  ++m_iPosFree;
188  return m_iPosFree - 1;
189 }
190 
192 {
193  //
194  // This decrements the index of the next unused ElemPos in the array
195  // allowing the element index returned by GetFreePos() to be reused
196  //
197  --m_iPosFree;
198  return 0;
199 }
200 
201 int CMarkup::x_ParseError(const char* szError, const char* szName)
202 {
203  if (szName)
204  m_csError = Format(szError, szName);
205  else
206  m_csError = szError;
207  x_ReleasePos();
208  return -1;
209 }
210 
211 int CMarkup::x_ParseElem(int iPosParent)
212 {
213  // This is either called by SetDoc, x_AddSubDoc, or itself recursively
214  // m_aPos[iPosParent].nEndL is where to start parsing for the child element
215  // This returns the new position if a tag is found, otherwise zero
216  // In all cases we need to get a new ElemPos, but release it if unused
217  //
218  int iPos = x_GetFreePos();
219  m_aPos[iPos].nStartL = m_aPos[iPosParent].nEndL;
220  m_aPos[iPos].iElemParent = iPosParent;
221  m_aPos[iPos].iElemChild = 0;
222  m_aPos[iPos].iElemNext = 0;
223 
224  // Start Tag
225  // A loop is used to ignore all remarks tags and special tags
226  // i.e. <?xml version="1.0"?>, and <!-- comment here -->
227  // So any tag beginning with ? or ! is ignored
228  // Loop past ignored tags
229  TokenPos token(m_csDoc.c_str());
230  token.nNext = m_aPos[iPosParent].nEndL;
231  std::string csName;
232  while (csName.empty())
233  {
234  // Look for left angle bracket of start tag
235  m_aPos[iPos].nStartL = token.nNext;
236  if (! x_FindChar(token.szDoc, m_aPos[iPos].nStartL, '<'))
237  return x_ParseError("Element tag not found");
238 
239  // Set parent's End tag to start looking from here (or later)
240  m_aPos[iPosParent].nEndL = m_aPos[iPos].nStartL;
241 
242  // Determine whether this is an element, or bypass other type of node
243  token.nNext = m_aPos[iPos].nStartL + 1;
244  if (x_FindToken(token))
245  {
246  if (token.bIsString)
247  return x_ParseError("Tag starts with quote");
248  char cFirstChar = m_csDoc[token.nL];
249  if (cFirstChar == '?' || cFirstChar == '!')
250  {
251  token.nNext = m_aPos[iPos].nStartL;
252  if (! x_ParseNode(token))
253  return x_ParseError("Invalid node");
254  }
255  else if (cFirstChar != '/')
256  {
257  csName = x_GetToken(token);
258  // Look for end of tag
259  if (! x_FindChar(token.szDoc, token.nNext, '>'))
260  return x_ParseError("End of tag not found");
261  }
262  else
263  return x_ReleasePos(); // probably end tag of parent
264  }
265  else
266  return x_ParseError("Abrupt end within tag");
267  }
268  m_aPos[iPos].nStartR = token.nNext;
269 
270  // Is ending mark within start tag, i.e. empty element?
271  if (m_csDoc[m_aPos[iPos].nStartR-1] == '/')
272  {
273  // Empty element
274  // Close tag left is set to ending mark, and right to open tag right
275  m_aPos[iPos].nEndL = m_aPos[iPos].nStartR-1;
276  m_aPos[iPos].nEndR = m_aPos[iPos].nStartR;
277  }
278  else // look for end tag
279  {
280  // Element probably has contents
281  // Determine where to start looking for left angle bracket of end tag
282  // This is done by recursively parsing the contents of this element
283  int iInner, iInnerPrev = 0;
284  m_aPos[iPos].nEndL = m_aPos[iPos].nStartR + 1;
285  while ((iInner = x_ParseElem(iPos)) > 0)
286  {
287  // Set links to iInner
288  if (iInnerPrev)
289  m_aPos[iInnerPrev].iElemNext = iInner;
290  else
291  m_aPos[iPos].iElemChild = iInner;
292  iInnerPrev = iInner;
293 
294  // Set offset to reflect child
295  m_aPos[iPos].nEndL = m_aPos[iInner].nEndR + 1;
296  }
297  if (iInner == -1)
298  return -1;
299 
300  // Look for left angle bracket of end tag
301  if (! x_FindChar(token.szDoc, m_aPos[iPos].nEndL, '<'))
302  return x_ParseError("End tag of %s element not found", csName.c_str());
303 
304  // Look through tokens of end tag
305  token.nNext = m_aPos[iPos].nEndL + 1;
306  int nTokenCount = 0;
307  while (x_FindToken(token))
308  {
309  ++nTokenCount;
310  if (! token.bIsString)
311  {
312  // Is first token not an end slash mark?
313  if (nTokenCount == 1 && m_csDoc[token.nL] != '/')
314  return x_ParseError("Expecting end tag of element %s", csName.c_str());
315 
316  else if (nTokenCount == 2 && ! token.Match(csName.c_str()))
317  return x_ParseError("End tag does not correspond to %s", csName.c_str());
318 
319  // Else is it a right angle bracket?
320  else if (m_csDoc[token.nL] == '>')
321  break;
322  }
323  }
324 
325  // Was a right angle bracket not found?
326  if (! token.szDoc[token.nL] || nTokenCount < 2)
327  return x_ParseError("End tag not completed for element %s", csName.c_str());
328  m_aPos[iPos].nEndR = token.nL;
329  }
330 
331  // Successfully parsed element (and contained elements)
332  return iPos;
333 }
334 
335 bool CMarkup::x_FindChar(const char* szDoc, int& nChar, char c)
336 {
337  // static function
338  const char* pChar = &szDoc[nChar];
339  while (*pChar && *pChar != c)
340  pChar += 1; //_tclen(pChar);
341  nChar = (int)(pChar - szDoc);
342  if (! *pChar)
343  return false;
344  /*
345  while (szDoc[nChar] && szDoc[nChar] != c)
346  nChar += _tclen(&szDoc[nChar]);
347  if (! szDoc[nChar])
348  return false;
349  */
350  return true;
351 }
352 
353 bool CMarkup::x_FindAny(const char* szDoc, int& nChar)
354 {
355  // Starting at nChar, find a non-whitespace char
356  // return false if no non-whitespace before end of document, nChar points to end
357  // otherwise return true and nChar points to non-whitespace char
358  while (szDoc[nChar] && strchr(" \t\n\r", szDoc[nChar]))
359  ++nChar;
360  return szDoc[nChar] != '\0';
361 }
362 
364 {
365  // Starting at token.nNext, bypass whitespace and find the next token
366  // returns true on success, members of token point to token
367  // returns false on end of document, members point to end of document
368  const char* szDoc = token.szDoc;
369  int nChar = token.nNext;
370  token.bIsString = false;
371 
372  // By-pass leading whitespace
373  if (! x_FindAny(szDoc,nChar))
374  {
375  // No token was found before end of document
376  token.nL = nChar;
377  token.nR = nChar;
378  token.nNext = nChar;
379  return false;
380  }
381 
382  // Is it an opening quote?
383  char cFirstChar = szDoc[nChar];
384  if (cFirstChar == '\"' || cFirstChar == '\'')
385  {
386  token.bIsString = true;
387 
388  // Move past opening quote
389  ++nChar;
390  token.nL = nChar;
391 
392  // Look for closing quote
393  x_FindChar(token.szDoc, nChar, cFirstChar);
394 
395  // Set right to before closing quote
396  token.nR = nChar - 1;
397 
398  // Set nChar past closing quote unless at end of document
399  if (szDoc[nChar])
400  ++nChar;
401  }
402  else
403  {
404  // Go until special char or whitespace
405  token.nL = nChar;
406  while (szDoc[nChar] && ! strchr(" \t\n\r<>=\\/?!", szDoc[nChar]))
407  nChar += 1; //_tclen(&szDoc[nChar]);
408 
409  // Adjust end position if it is one special char
410  if (nChar == token.nL)
411  ++nChar; // it is a special char
412  token.nR = nChar - 1;
413  }
414 
415  // nNext points to one past last char of token
416  token.nNext = nChar;
417  return true;
418 }
419 
420 std::string CMarkup::x_GetToken(const CMarkup::TokenPos& token) const
421 {
422  // The token contains indexes into the document identifying a small substring
423  // Build the substring from those indexes and return it
424  if (token.nL > token.nR)
425  return "";
426  return Mid(m_csDoc, token.nL,
427  token.nR - token.nL + ((token.nR < (int)(m_csDoc.length())) ? 1 : 0));
428 }
429 
430 int CMarkup::x_FindElem(int iPosParent, int iPos, const char* szPath)
431 {
432  // If szPath is NULL or empty, go to next sibling element
433  // Otherwise go to next sibling element with matching path
434  //
435  if (iPos)
436  iPos = m_aPos[iPos].iElemNext;
437  else
438  iPos = m_aPos[iPosParent].iElemChild;
439 
440  // Finished here if szPath not specified
441  if (szPath == NULL || !szPath[0])
442  return iPos;
443 
444  // Search
445  TokenPos token(m_csDoc.c_str());
446  while (iPos)
447  {
448  // Compare tag name
449  token.nNext = m_aPos[iPos].nStartL + 1;
450  x_FindToken(token); // Locate tag name
451  if (token.Match(szPath))
452  return iPos;
453  iPos = m_aPos[iPos].iElemNext;
454  }
455  return 0;
456 }
457 
459 {
460  // Call this with token.nNext set to the start of the node
461  // This returns the node type and token.nNext set to the char after the node
462  // If the node is not found or an element, token.nR is not determined
463  int nTypeFound = 0;
464  const char* szDoc = token.szDoc;
465  token.nL = token.nNext;
466  if (szDoc[token.nL] == '<')
467  {
468  // Started with <, could be:
469  // <!--...--> comment
470  // <!DOCTYPE ...> dtd
471  // <?target ...?> processing instruction
472  // <![CDATA[...]]> cdata section
473  // <NAME ...> element
474  //
475  if (! szDoc[token.nL+1] || ! szDoc[token.nL+2])
476  return 0;
477  char cFirstChar = szDoc[token.nL+1];
478  const char* szEndOfNode = NULL;
479  if (cFirstChar == '?')
480  {
481  nTypeFound = MNT_PROCESSING_INSTRUCTION;
482  szEndOfNode = "?>";
483  }
484  else if (cFirstChar == '!')
485  {
486  char cSecondChar = szDoc[token.nL+2];
487  if (cSecondChar == '[')
488  {
489  nTypeFound = MNT_CDATA_SECTION;
490  szEndOfNode = "]]>";
491  }
492  else if (cSecondChar == '-')
493  {
494  nTypeFound = MNT_COMMENT;
495  szEndOfNode = "-->";
496  }
497  else
498  {
499  // Document type requires tokenizing because of strings and brackets
500  nTypeFound = 0;
501  int nBrackets = 0;
502  while (x_FindToken(token))
503  {
504  if (! token.bIsString)
505  {
506  char cChar = szDoc[token.nL];
507  if (cChar == '[')
508  ++nBrackets;
509  else if (cChar == ']')
510  --nBrackets;
511  else if (nBrackets == 0 && cChar == '>')
512  {
513  nTypeFound = MNT_DOCUMENT_TYPE;
514  break;
515  }
516  }
517  }
518  if (! nTypeFound)
519  return 0;
520  }
521  }
522  else if (cFirstChar == '/')
523  {
524  // End tag means no node found within parent element
525  return 0;
526  }
527  else
528  {
529  nTypeFound = MNT_ELEMENT;
530  }
531 
532  // Search for end of node if not found yet
533  if (szEndOfNode)
534  {
535  const char* pEnd = strstr(&szDoc[token.nNext], szEndOfNode);
536  if (! pEnd)
537  return 0; // not well-formed
538  token.nNext = (int)(pEnd - szDoc) + (int)strlen(szEndOfNode);
539  }
540  }
541  else if (szDoc[token.nL])
542  {
543  // It is text or whitespace because it did not start with <
544  nTypeFound = MNT_WHITESPACE;
545  token.nNext = token.nL;
546  if (x_FindAny(szDoc,token.nNext))
547  {
548  if (szDoc[token.nNext] != '<')
549  {
550  nTypeFound = MNT_TEXT;
551  x_FindChar(szDoc, token.nNext, '<');
552  }
553  }
554  }
555  return nTypeFound;
556 }
557 
558 std::string CMarkup::x_GetTagName(int iPos) const
559 {
560  // Return the tag name at specified element
561  TokenPos token(m_csDoc.c_str());
562  token.nNext = m_aPos[iPos].nStartL + 1;
563  if (! iPos || ! x_FindToken(token))
564  return "";
565 
566  // Return substring of document
567  return x_GetToken(token);
568 }
569 
570 bool CMarkup::x_FindAttrib(CMarkup::TokenPos& token, const char* szAttrib) const
571 {
572  // If szAttrib is NULL find next attrib, otherwise find named attrib
573  // Return true if found
574  int nAttrib = 0;
575  for (int nCount = 0; x_FindToken(token); ++nCount)
576  {
577  if (! token.bIsString)
578  {
579  // Is it the right angle bracket?
580  char cChar = m_csDoc[token.nL];
581  if (cChar == '>' || cChar == '/' || cChar == '?')
582  break; // attrib not found
583 
584  // Equal sign
585  if (cChar == '=')
586  continue;
587 
588  // Potential attribute
589  if (! nAttrib && nCount)
590  {
591  // Attribute name search?
592  if (! szAttrib || ! szAttrib[0])
593  return true; // return with token at attrib name
594 
595  // Compare szAttrib
596  if (token.Match(szAttrib))
597  nAttrib = nCount;
598  }
599  }
600  else if (nAttrib && nCount == nAttrib + 2)
601  {
602  return true;
603  }
604  }
605 
606  // Not found
607  return false;
608 }
609 
610 std::string CMarkup::x_GetAttrib(int iPos, const char* szAttrib) const
611 {
612  // Return the value of the attrib
613  TokenPos token(m_csDoc.c_str());
614  if (iPos && m_nNodeType == MNT_ELEMENT)
615  token.nNext = m_aPos[iPos].nStartL + 1;
616  else
617  return "";
618 
619  if (szAttrib && x_FindAttrib(token, szAttrib))
620  return x_TextFromDoc(token.nL, token.nR - ((token.nR < (int)(m_csDoc.length())) ? 0 : 1));
621  return "";
622 }
623 
624 bool CMarkup::x_SetAttrib(int iPos, const char* szAttrib, const char* szValue)
625 {
626  // Set attribute in iPos element
627  TokenPos token(m_csDoc.c_str());
628  int nInsertAt;
629  if (iPos && m_nNodeType == MNT_ELEMENT)
630  {
631  token.nNext = m_aPos[iPos].nStartL + 1;
632  nInsertAt = m_aPos[iPos].nStartR - (m_aPos[iPos].IsEmptyElement()?1:0);
633  }
634  else
635  return false;
636 
637  // Create insertion text depending on whether attribute already exists
638  int nReplace = 0;
639  std::string csInsert;
640  if (x_FindAttrib(token, szAttrib))
641  {
642  // Replace value only
643  // Decision: for empty value leaving attrib="" instead of removing attrib
644  csInsert = x_TextToDoc(szValue, true);
645  nInsertAt = token.nL;
646  nReplace = token.nR-token.nL+1;
647  }
648  else
649  {
650  // Insert string name value pair
651  std::string csFormat;
652  csFormat = " ";
653  csFormat += szAttrib;
654  csFormat += "=\"";
655  csFormat += x_TextToDoc(szValue, true);
656  csFormat += "\"";
657  csInsert = csFormat;
658  }
659 
660  x_DocChange(nInsertAt, nReplace, csInsert);
661  int nAdjust = (int)csInsert.length() - nReplace;
662  m_aPos[iPos].nStartR += nAdjust;
663  m_aPos[iPos].AdjustEnd(nAdjust);
664  x_Adjust(iPos, nAdjust);
666  return true;
667 }
668 
669 std::string CMarkup::x_GetData(int iPos) const
670 {
671 
672  // Return a string representing data between start and end tag
673  // Return empty string if there are any children elements
674  if (! m_aPos[iPos].iElemChild && ! m_aPos[iPos].IsEmptyElement())
675  {
676  // See if it is a CDATA section
677  const char* szDoc = (const char*)(m_csDoc.c_str());
678  int nChar = m_aPos[iPos].nStartR + 1;
679  if (x_FindAny(szDoc, nChar) && szDoc[nChar] == '<'
680  && nChar + 11 < m_aPos[iPos].nEndL
681  && strncmp(&szDoc[nChar], "<![CDATA[", 9) == 0)
682  {
683  nChar += 9;
684  int nEndCDATA = (int)m_csDoc.find("]]>", nChar);
685  if (nEndCDATA != -1 && nEndCDATA < m_aPos[iPos].nEndL)
686  {
687  return Mid(m_csDoc, nChar, nEndCDATA - nChar);
688  }
689  }
690  return x_TextFromDoc(m_aPos[iPos].nStartR+1, m_aPos[iPos].nEndL-1);
691  }
692  return "";
693 }
694 
695 std::string CMarkup::x_TextToDoc(const char* szText, bool bAttrib) const
696 {
697  // Convert text as seen outside XML document to XML friendly
698  // replacing special characters with ampersand escape codes
699  // E.g. convert "6>7" to "6&gt;7"
700  //
701  // &lt; less than
702  // &amp; ampersand
703  // &gt; greater than
704  //
705  // and for attributes:
706  //
707  // &apos; apostrophe or single quote
708  // &quot; double quote
709  //
710  static const char* szaReplace[] = { "&lt;", "&amp;", "&gt;", "&apos;", "&quot;" };
711  const char* pFind = bAttrib ? "<&>\'\"" : "<&>";
712  std::string csText;
713  const char* pSource = szText;
714  int nDestSize = (int)strlen(pSource);
715  nDestSize += nDestSize / 10 + 7;
716  char* pDest = GetBuffer(csText, nDestSize);
717  int nLen = 0;
718  char cSource = *pSource;
719  const char* pFound;
720  while (cSource)
721  {
722  if (nLen > nDestSize - 6)
723  {
724  ReleaseBuffer(csText, nLen);
725  nDestSize *= 2;
726  pDest = GetBuffer(csText, nDestSize);
727  }
728  if ((pFound = strchr(pFind,cSource)) != NULL)
729  {
730  pFound = szaReplace[pFound-pFind];
731 #ifdef _WIN32
732  strcpy_s(&pDest[nLen], nDestSize, pFound);
733 #else
734  strncpy(&pDest[nLen], pFound, nDestSize);
735 #endif
736  nLen += (int)strlen(pFound);
737  }
738  else
739  {
740  pDest[nLen] = *pSource;
741  nLen += 1; //_tclen(pSource);
742  }
743  pSource += 1; //_tclen(pSource);
744  cSource = *pSource;
745  }
746  ReleaseBuffer(csText, nLen);
747  return csText;
748 }
749 
750 std::string CMarkup::x_TextFromDoc(int nLeft, int nRight) const
751 {
752  // Convert XML friendly text to text as seen outside XML document
753  // ampersand escape codes replaced with special characters e.g. convert "6&gt;7" to "6>7"
754  // Conveniently the result is always the same or shorter in byte length
755  //
756  static const char* szaCode[] = { "lt;", "amp;", "gt;", "apos;", "quot;" };
757  static int anCodeLen[] = { 3,4,3,5,5 };
758  static const char* szSymbol = "<&>\'\"";
759  std::string csText;
760  const char* pSource = m_csDoc.c_str();
761  int nDestSize = nRight - nLeft + 1;
762  char* pDest = GetBuffer(csText, nDestSize);
763  int nLen = 0;
764  int nCharLen;
765  int nChar = nLeft;
766  while (nChar <= nRight)
767  {
768  if (pSource[nChar] == '&')
769  {
770  // Look for matching &code;
771  bool bCodeConverted = false;
772  for (int nMatch = 0; nMatch < 5; ++nMatch)
773  {
774  if (nChar <= nRight - anCodeLen[nMatch]
775  && strncmp(szaCode[nMatch],&pSource[nChar+1],anCodeLen[nMatch]) == 0)
776  {
777  // Insert symbol and increment index past ampersand semi-colon
778  pDest[nLen++] = szSymbol[nMatch];
779  nChar += anCodeLen[nMatch] + 1;
780  bCodeConverted = true;
781  break;
782  }
783  }
784 
785  // If the code is not converted, leave it as is
786  if (! bCodeConverted)
787  {
788  pDest[nLen++] = '&';
789  ++nChar;
790  }
791  }
792  else // not &
793  {
794  nCharLen = 1; //_tclen(&pSource[nChar]);
795  pDest[nLen] = pSource[nChar];
796  nLen += nCharLen;
797  nChar += nCharLen;
798  }
799  }
800  ReleaseBuffer(csText, nLen);
801  return csText;
802 }
803 
804 void CMarkup::x_DocChange(int nLeft, int nReplace, const std::string& csInsert)
805 {
806  // Insert csInsert int m_csDoc at nLeft replacing nReplace chars
807  // Do this with only one buffer reallocation if it grows
808  //
809  int nDocLength = (int)m_csDoc.length();
810  int nInsLength = (int)csInsert.length();
811 
812  // Make sure nLeft and nReplace are within bounds
813  nLeft = std::max(0, std::min(nLeft, nDocLength));
814  nReplace = std::max(0, std::min(nReplace, nDocLength-nLeft));
815 
816  // Get pointer to buffer with enough room
817  int nNewLength = nInsLength + nDocLength - nReplace;
818  int nBufferLen = nNewLength;
819  char* pDoc = GetBuffer(m_csDoc, nBufferLen);
820 
821  // Move part of old doc that goes after insert
822  if (nLeft+nReplace < nDocLength)
823  memmove(&pDoc[nLeft+nInsLength], &pDoc[nLeft+nReplace], (nDocLength-nLeft-nReplace)*sizeof(char));
824 
825  // Copy insert
826  memcpy(&pDoc[nLeft], csInsert.c_str(), nInsLength*sizeof(char));
827 
828  // Release
829  ReleaseBuffer(m_csDoc, nNewLength);
830 }
831 
832 void CMarkup::x_Adjust(int iPos, int nShift, bool bAfterPos)
833 {
834  // Loop through affected elements and adjust indexes
835  // Algorithm:
836  // 1. update children unless bAfterPos
837  // (if no children or bAfterPos is true, end tag of iPos not affected)
838  // 2. update next siblings and their children
839  // 3. go up until there is a next sibling of a parent and update end tags
840  // 4. step 2
841  int iPosTop = m_aPos[iPos].iElemParent;
842  bool bPosFirst = bAfterPos; // mark as first to skip its children
843  while (iPos)
844  {
845  // Were we at containing parent of affected position?
846  bool bPosTop = false;
847  if (iPos == iPosTop)
848  {
849  // Move iPosTop up one towards root
850  iPosTop = m_aPos[iPos].iElemParent;
851  bPosTop = true;
852  }
853 
854  // Traverse to the next update position
855  if (! bPosTop && ! bPosFirst && m_aPos[iPos].iElemChild)
856  {
857  // Depth first
858  iPos = m_aPos[iPos].iElemChild;
859  }
860  else if (m_aPos[iPos].iElemNext)
861  {
862  iPos = m_aPos[iPos].iElemNext;
863  }
864  else
865  {
866  // Look for next sibling of a parent of iPos
867  // When going back up, parents have already been done except iPosTop
868  while ((iPos=m_aPos[iPos].iElemParent) != 0 && iPos != iPosTop)
869  if (m_aPos[iPos].iElemNext)
870  {
871  iPos = m_aPos[iPos].iElemNext;
872  break;
873  }
874  }
875  bPosFirst = false;
876 
877  // Shift indexes at iPos
878  if (iPos != iPosTop)
879  m_aPos[iPos].AdjustStart(nShift);
880  m_aPos[iPos].AdjustEnd(nShift);
881  }
882 }
883 
884 void CMarkup::x_LocateNew(int iPosParent, int& iPosRel, int& nOffset, int nLength, int nFlags)
885 {
886  // Determine where to insert new element or node
887  //
888  bool bInsert = (nFlags&1)?true:false;
889  bool bHonorWhitespace = (nFlags&2)?true:false;
890 
891  std::string::size_type nStartL;
892  if (nLength)
893  {
894  // Located at a non-element node
895  if (bInsert)
896  nStartL = nOffset;
897  else
898  nStartL = nOffset + nLength;
899  }
900  else if (iPosRel)
901  {
902  // Located at an element
903  if (bInsert) // precede iPosRel
904  nStartL = m_aPos[iPosRel].nStartL;
905  else // follow iPosRel
906  nStartL = m_aPos[iPosRel].nEndR + 1;
907  }
908  else if (! iPosParent)
909  {
910  // Outside of all elements
911  if (bInsert)
912  nStartL = 0;
913  else
914  nStartL = m_csDoc.length();
915  }
916  else if (m_aPos[iPosParent].IsEmptyElement())
917  {
918  // Parent has no separate end tag, so split empty element
919  nStartL = m_aPos[iPosParent].nStartR;
920  }
921  else
922  {
923  if (bInsert) // after start tag
924  nStartL = m_aPos[iPosParent].nStartR + 1;
925  else // before end tag
926  nStartL = m_aPos[iPosParent].nEndL;
927  }
928 
929  // Go up to start of next node, unless its splitting an empty element
930  if (! bHonorWhitespace && ! m_aPos[iPosParent].IsEmptyElement())
931  {
932  const char* szDoc = (const char*)m_csDoc.c_str();
933  int nChar = (int)nStartL;
934  if (! x_FindAny(szDoc,nChar) || szDoc[nChar] == '<')
935  nStartL = nChar;
936  }
937 
938  // Determine iPosBefore
939  int iPosBefore = 0;
940  if (iPosRel)
941  {
942  if (bInsert)
943  {
944  // Is iPosRel past first sibling?
945  int iPosPrev = m_aPos[iPosParent].iElemChild;
946  if (iPosPrev != iPosRel)
947  {
948  // Find previous sibling of iPosRel
949  while (m_aPos[iPosPrev].iElemNext != iPosRel)
950  iPosPrev = m_aPos[iPosPrev].iElemNext;
951  iPosBefore = iPosPrev;
952  }
953  }
954  else
955  {
956  iPosBefore = iPosRel;
957  }
958  }
959  else if (m_aPos[iPosParent].iElemChild)
960  {
961  if (! bInsert)
962  {
963  // Find last element under iPosParent
964  int iPosLast = m_aPos[iPosParent].iElemChild;
965  int iPosNext = iPosLast;
966  while (iPosNext)
967  {
968  iPosLast = iPosNext;
969  iPosNext = m_aPos[iPosNext].iElemNext;
970  }
971  iPosBefore = iPosLast;
972  }
973  }
974 
975  nOffset = (int)nStartL;
976  iPosRel = iPosBefore;
977 }
978 
979 bool CMarkup::x_AddElem(const char* szName, const char* szValue, bool bInsert, bool bAddChild)
980 {
981  if (bAddChild)
982  {
983  // Adding a child element under main position
984  if (! m_iPos)
985  return false;
986  }
987  else if (m_iPosParent == 0)
988  {
989  // Adding root element
990  if (IsWellFormed())
991  return false;
992 
993 
994  // Locate after any version and DTD
995  m_aPos[0].nEndL = (int)m_csDoc.length();
996  }
997 
998  // Locate where to add element relative to current node
999  int iPosParent, iPosBefore, nOffset = 0, nLength = 0;
1000  if (bAddChild)
1001  {
1002  iPosParent = m_iPos;
1003  iPosBefore = m_iPosChild;
1004  }
1005  else
1006  {
1007  iPosParent = m_iPosParent;
1008  iPosBefore = m_iPos;
1009  }
1010  int nFlags = bInsert?1:0;
1011  x_LocateNew(iPosParent, iPosBefore, nOffset, nLength, nFlags);
1012  // LocateNew: in case of an empty parent it finds the end of the start tag (sort of)
1013  // in case of a non-empty parent it finds the char before the start of the end tag.
1014 
1015 
1016  // Find out the indent we need:
1017  int nTopParent = iPosParent;
1018  int nLevel = 0;
1019  while (nTopParent)
1020  {
1021  nTopParent = m_aPos[nTopParent].iElemParent;
1022  nLevel++;
1023  }
1024  int nIndentChars = nLevel * mnIndent;
1025  mtIndent[ nIndentChars ] = 0;
1026 
1027 
1028  bool bEmptyParent = m_aPos[iPosParent].IsEmptyElement();
1029  if (bEmptyParent || m_aPos[iPosParent].nStartR + 1 == m_aPos[iPosParent].nEndL)
1030  {
1031  nOffset += 2;
1032  }
1033  else
1034  {
1035  if ((nOffset < (int)(m_csDoc.length())) && (0 < nOffset) &&
1036  (' ' == m_csDoc[nOffset-1]))
1037  {
1038  while ((0 < nOffset) && (' ' == m_csDoc[nOffset-1]))
1039  --nOffset;
1040  }
1041  }
1042 
1043  // Create element and modify positions of affected elements
1044  // If no szValue is specified, an empty element is created
1045  // i.e. either <NAME>value</NAME> or <NAME/>
1046  int iPos = x_GetFreePos();
1047  m_aPos[iPos].nStartL = nOffset + nIndentChars;
1048 
1049  // Set links
1050  m_aPos[iPos].iElemParent = iPosParent;
1051  m_aPos[iPos].iElemChild = 0;
1052  m_aPos[iPos].iElemNext = 0;
1053  if (iPosBefore)
1054  {
1055  // Link in after iPosBefore
1056  m_aPos[iPos].iElemNext = m_aPos[iPosBefore].iElemNext;
1057  m_aPos[iPosBefore].iElemNext = iPos;
1058  }
1059  else
1060  {
1061  // First child
1062  m_aPos[iPos].iElemNext = m_aPos[iPosParent].iElemChild;
1063  m_aPos[iPosParent].iElemChild = iPos;
1064  }
1065 
1066  // Create string for insert
1067  std::string csInsert;
1068  int nLenName = (int)strlen(szName);
1069  int nLenValue = szValue ? (int)strlen(szValue) : 0;
1070  if (! nLenValue)
1071  {
1072  // <NAME/> empty element
1073  csInsert = mtIndent;
1074  csInsert += "<";
1075  csInsert += szName;
1076  csInsert += "/>\r\n";
1077  m_aPos[iPos].nStartR = m_aPos[iPos].nStartL + nLenName + 2;
1078  m_aPos[iPos].nEndL = m_aPos[iPos].nStartR - 1;
1079  m_aPos[iPos].nEndR = m_aPos[iPos].nEndL + 1;
1080  }
1081  else
1082  {
1083  // <NAME>value</NAME>
1084  std::string csValue = x_TextToDoc(szValue);
1085  nLenValue = (int)csValue.length();
1086  csInsert = mtIndent;
1087  csInsert += "<";
1088  csInsert += szName;
1089  csInsert += ">";
1090  csInsert += csValue;
1091  csInsert += "</";
1092  csInsert += szName;
1093  csInsert += ">\r\n";
1094  m_aPos[iPos].nStartR = m_aPos[iPos].nStartL + nLenName + 1;
1095  m_aPos[iPos].nEndL = m_aPos[iPos].nStartR + nLenValue + 1;
1096  m_aPos[iPos].nEndR = m_aPos[iPos].nEndL + nLenName + 2;
1097  }
1098  mtIndent[ nIndentChars ] = ' ';
1099 
1100 
1101 
1102 
1103  // Insert
1104  int nReplace = 0, nLeft = m_aPos[iPos].nStartL;
1105  if (bEmptyParent)
1106  {
1107  std::string csParentTagName = x_GetTagName(iPosParent);
1108  std::string csFormat;
1109  csFormat = ">\r\n";
1110  csFormat += csInsert;
1111  mtIndent[ nIndentChars - mnIndent ] = 0;
1112  csFormat += mtIndent;
1113  mtIndent[ nIndentChars - mnIndent ] = ' ';
1114  csFormat += "</";
1115  csFormat += csParentTagName;
1116  csInsert = csFormat;
1117  nLeft = m_aPos[iPosParent].nStartR - 1;
1118  nReplace = 1;
1119  // x_Adjust is going to update all affected indexes by one amount
1120  // This will satisfy all except the empty parent
1121  // Here we pre-adjust for the empty parent
1122  // The empty tag slash is removed
1123  m_aPos[iPosParent].nStartR -= 1;
1124  // For the newly created end tag, see the following example:
1125  // <A/> (len 4) becomes <A><B/></A> (len 11)
1126  // In x_Adjust everything will be adjusted 11 - 4 = 7
1127  // But the nEndL of element A should only be adjusted 5
1128  m_aPos[iPosParent].nEndL -= (int)(csParentTagName.length() + 1);
1129  }
1130  else if (m_aPos[iPosParent].nStartR + 1 == m_aPos[iPosParent].nEndL)
1131  {
1132  // Empty parent, but with an end tag following right after.
1133  csInsert = "\r\n" + csInsert;
1134  mtIndent[ nIndentChars - mnIndent ] = 0;
1135  csInsert += mtIndent;
1136  mtIndent[ nIndentChars - mnIndent ] = ' ';
1137  nLeft = m_aPos[iPosParent].nStartR + 1;
1138  }
1139  else
1140  {
1141  nLeft -= nIndentChars;
1142  }
1143 
1144  x_DocChange(nLeft, nReplace, csInsert);
1145  x_Adjust(iPos, (int)csInsert.length() - nReplace);
1146 
1147  if (bAddChild)
1148  x_SetPos(m_iPosParent, iPosParent, iPos);
1149  else
1150  x_SetPos(iPosParent, iPos, 0);
1151  return true;
1152 }
1153 
1154 std::string CMarkup::Format(const char *fmt, ...)
1155 {
1156  using std::string;
1157  using std::vector;
1158 
1159  string retStr("");
1160 
1161  if (NULL != fmt)
1162  {
1163  va_list marker;
1164 
1165  // initialize variable arguments
1166  va_start(marker, fmt);
1167 
1168  // Get formatted string length adding one for NULL
1169 #ifdef _WIN32
1170  size_t len = _vscprintf(fmt, marker) + 1;
1171 #else
1172  va_list argcopy;
1173  va_copy(argcopy, marker);
1174  auto len = vsnprintf(NULL, 0, fmt, marker) + 1;
1175  va_end(argcopy);
1176 #endif
1177 
1178  // Create a char vector to hold the formatted string.
1179  vector<char> buffer(len, '\0');
1180 #ifdef _WIN32
1181  int nWritten = _vsnprintf_s(&buffer[0], buffer.size(), len, fmt, marker);
1182 #else
1183  int nWritten = vsnprintf(&buffer[0], len, fmt, marker);
1184 #endif
1185  if (nWritten > 0)
1186  {
1187  retStr = &buffer[0];
1188  }
1189 
1190  // Reset variable arguments
1191  va_end(marker);
1192  }
1193 
1194  return retStr;
1195 }
1196 
1197 std::string CMarkup::Mid(const std::string &tStr, int nFirst) const
1198 {
1199  return Mid(tStr, nFirst, (int)tStr.length() - nFirst);
1200 }
1201 
1202 std::string CMarkup::Mid(const std::string &tStr, int nFirst, int nCount) const
1203 {
1204  if (nFirst < 0)
1205  {
1206  nFirst = 0;
1207  }
1208  if (nCount < 0)
1209  {
1210  nCount = 0;
1211  }
1212 
1213  int nSize = static_cast<int>(tStr.size());
1214 
1215  if (nFirst + nCount > nSize)
1216  {
1217  nCount = nSize - nFirst;
1218  }
1219 
1220  if (nFirst > nSize)
1221  {
1222  std::string tStrEmpty;
1223  return tStrEmpty;
1224  }
1225 
1226  assert(nFirst >= 0);
1227  assert(nFirst + nCount <= nSize);
1228 
1229  return tStr.substr(nFirst, nCount);
1230 }
1231 
1232 char* CMarkup::GetBuffer(std::string &tStr, int nMinLen) const
1233 {
1234  if (static_cast<int>(tStr.size()) < nMinLen)
1235  {
1236  tStr.resize(nMinLen);
1237  }
1238 
1239  return const_cast<char*>(tStr.c_str()); //tStr.empty() ? const_cast<char*>(tStr.c_str()) : &(tStr.at(0));
1240 }
1241 
1242 void CMarkup::ReleaseBuffer(std::string &tStr, int nNewLen) const
1243 {
1244  tStr.resize(nNewLen > -1 ? nNewLen : strlen(tStr.c_str()));
1245 }
1246 
1247 bool CMarkup::TokenPos::Match(const char* szName) const
1248 {
1249  int nLen = nR - nL + 1;
1250  return ((strncmp(&szDoc[nL], szName, nLen) == 0)
1251  && (szName[nLen] == '\0' || strchr(" =/[", szName[nLen])));
1252 }
void SetIndent(int nIndent=4)
Definition: Markup.cpp:26
void x_Adjust(int iPos, int nShift, bool bAfterPos=false)
Definition: Markup.cpp:832
static bool x_FindAny(const char *szDoc, int &nChar)
Definition: Markup.cpp:353
void operator=(const CMarkup &markup)
Definition: Markup.cpp:31
static bool x_FindToken(TokenPos &token)
Definition: Markup.cpp:363
int m_iPosChild
Definition: Markup.h:111
void x_DocChange(int nLeft, int nReplace, const std::string &csInsert)
Definition: Markup.cpp:804
#define MARKUP_SETDEBUGSTATE
Definition: Markup.h:20
void x_SetPos(int iPosParent, int iPos, int iPosChild)
Definition: Markup.h:128
static bool x_FindChar(const char *szDoc, int &nChar, char c)
Definition: Markup.cpp:335
bool x_AddElem(const char *szName, const char *szValue, bool bInsert, bool bAddChild)
Definition: Markup.cpp:979
bool OutOfElem()
Definition: Markup.cpp:165
int m_iPos
Definition: Markup.h:110
int x_ReleasePos()
Definition: Markup.cpp:191
bool SetDoc(const char *szDoc)
Definition: Markup.cpp:44
Definition: Markup.h:23
int x_FindElem(int iPosParent, int iPos, const char *szPath)
Definition: Markup.cpp:430
int mnIndent
Definition: Markup.h:161
std::string x_GetAttrib(int iPos, const char *szAttrib) const
Definition: Markup.cpp:610
const char * szDoc
Definition: Markup.h:124
std::string m_csDoc
Definition: Markup.h:83
int x_ParseElem(int iPos)
Definition: Markup.cpp:211
std::string x_GetToken(const TokenPos &token) const
Definition: Markup.cpp:420
std::string x_TextToDoc(const char *szText, bool bAttrib=false) const
Definition: Markup.cpp:695
bool x_SetAttrib(int iPos, const char *szAttrib, const char *szValue)
Definition: Markup.cpp:624
int m_iPosParent
Definition: Markup.h:109
static std::string Format(const char *fmt,...)
Definition: Markup.cpp:1154
int x_GetFreePos()
Definition: Markup.cpp:180
char mtIndent[1000]
Definition: Markup.h:160
int x_ParseNode(TokenPos &token)
Definition: Markup.cpp:458
std::string m_csError
Definition: Markup.h:84
std::vector< ElemPos > m_aPos
Definition: Markup.h:108
int m_iPosFree
Definition: Markup.h:112
std::string x_GetTagName(int iPos) const
Definition: Markup.cpp:558
bool FindChildElem(const char *szName=NULL)
Definition: Markup.cpp:114
std::string GetTagName() const
Definition: Markup.cpp:136
std::string x_GetData(int iPos) const
Definition: Markup.cpp:669
bool x_FindAttrib(TokenPos &token, const char *szAttrib=NULL) const
Definition: Markup.cpp:570
bool Match(const char *szName) const
Definition: Markup.cpp:1247
int x_ParseError(const char *szError, const char *szName=NULL)
Definition: Markup.cpp:201
bool IntoElem()
Definition: Markup.cpp:147
void ResetPos()
Definition: Markup.h:53
bool FindElem(const char *szName=NULL)
Definition: Markup.cpp:97
bool IsWellFormed()
Definition: Markup.cpp:90
void x_LocateNew(int iPosParent, int &iPosRel, int &nOffset, int nLength, int nFlags)
Definition: Markup.cpp:884
int m_nNodeType
Definition: Markup.h:113
std::string x_TextFromDoc(int nLeft, int nRight) const
Definition: Markup.cpp:750