JsonCpp project page JsonCpp home page

json_reader.cpp
Go to the documentation of this file.
1#include <json/reader.h>
2#include <json/value.h>
3#include <utility>
4#include <cstdio>
5#include <cassert>
6#include <cstring>
7#include <iostream>
8#include <stdexcept>
9
10#if _MSC_VER >= 1400 // VC++ 8.0
11#pragma warning( disable : 4996 ) // disable warning about strdup being deprecated.
12#endif
13
14namespace Json {
15
16// Implementation of class Features
17// ////////////////////////////////
18
20 : allowComments_( true )
21 , strictRoot_( false )
22{
23}
24
25
28{
29 return Features();
30}
31
32
35{
36 Features features;
37 features.allowComments_ = false;
38 features.strictRoot_ = true;
39 return features;
40}
41
42// Implementation of class Reader
43// ////////////////////////////////
44
45
46static inline bool
48{
49 return c == c1 || c == c2 || c == c3 || c == c4;
50}
51
52static inline bool
54{
55 return c == c1 || c == c2 || c == c3 || c == c4 || c == c5;
56}
57
58
59static bool
62{
63 for ( ;begin < end; ++begin )
64 if ( *begin == '\n' || *begin == '\r' )
65 return true;
66 return false;
67}
68
69static std::string codePointToUTF8(unsigned int cp)
70{
71 std::string result;
72
73 // based on description from http://en.wikipedia.org/wiki/UTF-8
74
75 if (cp <= 0x7f)
76 {
77 result.resize(1);
78 result[0] = static_cast<char>(cp);
79 }
80 else if (cp <= 0x7FF)
81 {
82 result.resize(2);
83 result[1] = static_cast<char>(0x80 | (0x3f & cp));
84 result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
85 }
86 else if (cp <= 0xFFFF)
87 {
88 result.resize(3);
89 result[2] = static_cast<char>(0x80 | (0x3f & cp));
90 result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));
91 result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));
92 }
93 else if (cp <= 0x10FFFF)
94 {
95 result.resize(4);
96 result[3] = static_cast<char>(0x80 | (0x3f & cp));
97 result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
98 result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
99 result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
100 }
101
102 return result;
103}
104
105
106// Class Reader
107// //////////////////////////////////////////////////////////////////
108
110 : features_( Features::all() )
111{
112}
113
114
115Reader::Reader( const Features &features )
116 : features_( features )
117{
118}
119
120
121bool
122Reader::parse( const std::string &document,
123 Value &root,
124 bool collectComments )
125{
126 document_ = document;
127 const char *begin = document_.c_str();
128 const char *end = begin + document_.length();
129 return parse( begin, end, root, collectComments );
130}
131
132
133bool
134Reader::parse( std::istream& sin,
135 Value &root,
136 bool collectComments )
137{
138 //std::istream_iterator<char> begin(sin);
139 //std::istream_iterator<char> end;
140 // Those would allow streamed input from a file, if parse() were a
141 // template function.
142
143 // Since std::string is reference-counted, this at least does not
144 // create an extra copy.
145 std::string doc;
146 std::getline(sin, doc, (char)EOF);
147 return parse( doc, root, collectComments );
148}
149
150bool
151Reader::parse( const char *beginDoc, const char *endDoc,
152 Value &root,
153 bool collectComments )
154{
155 if ( !features_.allowComments_ )
156 {
157 collectComments = false;
158 }
159
160 begin_ = beginDoc;
161 end_ = endDoc;
162 collectComments_ = collectComments;
163 current_ = begin_;
164 lastValueEnd_ = 0;
165 lastValue_ = 0;
166 commentsBefore_ = "";
167 errors_.clear();
168 while ( !nodes_.empty() )
169 nodes_.pop();
170 nodes_.push( &root );
171
172 bool successful = readValue();
173 Token token;
174 skipCommentTokens( token );
175 if ( collectComments_ && !commentsBefore_.empty() )
176 root.setComment( commentsBefore_, commentAfter );
177 if ( features_.strictRoot_ )
178 {
179 if ( !root.isArray() && !root.isObject() )
180 {
181 // Set error location to start of doc, ideally should be first token found in doc
182 token.type_ = tokenError;
183 token.start_ = beginDoc;
184 token.end_ = endDoc;
185 addError( "A valid JSON document must be either an array or an object value.",
186 token );
187 return false;
188 }
189 }
190 return successful;
191}
192
193
194bool
195Reader::readValue()
196{
197 Token token;
198 skipCommentTokens( token );
199 bool successful = true;
200
201 if ( collectComments_ && !commentsBefore_.empty() )
202 {
203 currentValue().setComment( commentsBefore_, commentBefore );
204 commentsBefore_ = "";
205 }
206
207
208 switch ( token.type_ )
209 {
210 case tokenObjectBegin:
211 successful = readObject( token );
212 break;
213 case tokenArrayBegin:
214 successful = readArray( token );
215 break;
216 case tokenNumber:
217 successful = decodeNumber( token );
218 break;
219 case tokenString:
220 successful = decodeString( token );
221 break;
222 case tokenTrue:
223 currentValue() = true;
224 break;
225 case tokenFalse:
226 currentValue() = false;
227 break;
228 case tokenNull:
229 currentValue() = Value();
230 break;
231 default:
232 return addError( "Syntax error: value, object or array expected.", token );
233 }
234
235 if ( collectComments_ )
236 {
237 lastValueEnd_ = current_;
238 lastValue_ = &currentValue();
239 }
240
241 return successful;
242}
243
244
245void
246Reader::skipCommentTokens( Token &token )
247{
248 if ( features_.allowComments_ )
249 {
250 do
251 {
252 readToken( token );
253 }
254 while ( token.type_ == tokenComment );
255 }
256 else
257 {
258 readToken( token );
259 }
260}
261
262
263bool
264Reader::expectToken( TokenType type, Token &token, const char *message )
265{
266 readToken( token );
267 if ( token.type_ != type )
268 return addError( message, token );
269 return true;
270}
271
272
273bool
274Reader::readToken( Token &token )
275{
276 skipSpaces();
277 token.start_ = current_;
278 Char c = getNextChar();
279 bool ok = true;
280 switch ( c )
281 {
282 case '{':
283 token.type_ = tokenObjectBegin;
284 break;
285 case '}':
286 token.type_ = tokenObjectEnd;
287 break;
288 case '[':
289 token.type_ = tokenArrayBegin;
290 break;
291 case ']':
292 token.type_ = tokenArrayEnd;
293 break;
294 case '"':
295 token.type_ = tokenString;
296 ok = readString();
297 break;
298 case '/':
299 token.type_ = tokenComment;
300 ok = readComment();
301 break;
302 case '0':
303 case '1':
304 case '2':
305 case '3':
306 case '4':
307 case '5':
308 case '6':
309 case '7':
310 case '8':
311 case '9':
312 case '-':
313 token.type_ = tokenNumber;
314 readNumber();
315 break;
316 case 't':
317 token.type_ = tokenTrue;
318 ok = match( "rue", 3 );
319 break;
320 case 'f':
321 token.type_ = tokenFalse;
322 ok = match( "alse", 4 );
323 break;
324 case 'n':
325 token.type_ = tokenNull;
326 ok = match( "ull", 3 );
327 break;
328 case ',':
329 token.type_ = tokenArraySeparator;
330 break;
331 case ':':
332 token.type_ = tokenMemberSeparator;
333 break;
334 case 0:
335 token.type_ = tokenEndOfStream;
336 break;
337 default:
338 ok = false;
339 break;
340 }
341 if ( !ok )
342 token.type_ = tokenError;
343 token.end_ = current_;
344 return true;
345}
346
347
348void
349Reader::skipSpaces()
350{
351 while ( current_ != end_ )
352 {
353 Char c = *current_;
354 if ( c == ' ' || c == '\t' || c == '\r' || c == '\n' )
355 ++current_;
356 else
357 break;
358 }
359}
360
361
362bool
363Reader::match( Location pattern,
364 int patternLength )
365{
366 if ( end_ - current_ < patternLength )
367 return false;
368 int index = patternLength;
369 while ( index-- )
370 if ( current_[index] != pattern[index] )
371 return false;
372 current_ += patternLength;
373 return true;
374}
375
376
377bool
378Reader::readComment()
379{
380 Location commentBegin = current_ - 1;
381 Char c = getNextChar();
382 bool successful = false;
383 if ( c == '*' )
384 successful = readCStyleComment();
385 else if ( c == '/' )
386 successful = readCppStyleComment();
387 if ( !successful )
388 return false;
389
390 if ( collectComments_ )
391 {
393 if ( lastValueEnd_ && !containsNewLine( lastValueEnd_, commentBegin ) )
394 {
395 if ( c != '*' || !containsNewLine( commentBegin, current_ ) )
396 placement = commentAfterOnSameLine;
397 }
398
399 addComment( commentBegin, current_, placement );
400 }
401 return true;
402}
403
404
405void
406Reader::addComment( Location begin,
407 Location end,
408 CommentPlacement placement )
409{
410 assert( collectComments_ );
411 if ( placement == commentAfterOnSameLine )
412 {
413 assert( lastValue_ != 0 );
414 lastValue_->setComment( std::string( begin, end ), placement );
415 }
416 else
417 {
418 if ( !commentsBefore_.empty() )
419 commentsBefore_ += "\n";
420 commentsBefore_ += std::string( begin, end );
421 }
422}
423
424
425bool
426Reader::readCStyleComment()
427{
428 while ( current_ != end_ )
429 {
430 Char c = getNextChar();
431 if ( c == '*' && *current_ == '/' )
432 break;
433 }
434 return getNextChar() == '/';
435}
436
437
438bool
439Reader::readCppStyleComment()
440{
441 while ( current_ != end_ )
442 {
443 Char c = getNextChar();
444 if ( c == '\r' || c == '\n' )
445 break;
446 }
447 return true;
448}
449
450
451void
452Reader::readNumber()
453{
454 while ( current_ != end_ )
455 {
456 if ( !(*current_ >= '0' && *current_ <= '9') &&
457 !in( *current_, '.', 'e', 'E', '+', '-' ) )
458 break;
459 ++current_;
460 }
461}
462
463bool
464Reader::readString()
465{
466 Char c = 0;
467 while ( current_ != end_ )
468 {
469 c = getNextChar();
470 if ( c == '\\' )
471 getNextChar();
472 else if ( c == '"' )
473 break;
474 }
475 return c == '"';
476}
477
478
479bool
480Reader::readObject( Token &tokenStart )
481{
482 Token tokenName;
483 std::string name;
484 currentValue() = Value( objectValue );
485 while ( readToken( tokenName ) )
486 {
487 bool initialTokenOk = true;
488 while ( tokenName.type_ == tokenComment && initialTokenOk )
489 initialTokenOk = readToken( tokenName );
490 if ( !initialTokenOk )
491 break;
492 if ( tokenName.type_ == tokenObjectEnd && name.empty() ) // empty object
493 return true;
494 if ( tokenName.type_ != tokenString )
495 break;
496
497 name = "";
498 if ( !decodeString( tokenName, name ) )
499 return recoverFromError( tokenObjectEnd );
500
501 Token colon;
502 if ( !readToken( colon ) || colon.type_ != tokenMemberSeparator )
503 {
504 return addErrorAndRecover( "Missing ':' after object member name",
505 colon,
506 tokenObjectEnd );
507 }
508 Value &value = currentValue()[ name ];
509 nodes_.push( &value );
510 bool ok = readValue();
511 nodes_.pop();
512 if ( !ok ) // error already set
513 return recoverFromError( tokenObjectEnd );
514
515 Token comma;
516 if ( !readToken( comma )
517 || ( comma.type_ != tokenObjectEnd &&
518 comma.type_ != tokenArraySeparator &&
519 comma.type_ != tokenComment ) )
520 {
521 return addErrorAndRecover( "Missing ',' or '}' in object declaration",
522 comma,
523 tokenObjectEnd );
524 }
525 bool finalizeTokenOk = true;
526 while ( comma.type_ == tokenComment &&
527 finalizeTokenOk )
528 finalizeTokenOk = readToken( comma );
529 if ( comma.type_ == tokenObjectEnd )
530 return true;
531 }
532 return addErrorAndRecover( "Missing '}' or object member name",
533 tokenName,
534 tokenObjectEnd );
535}
536
537
538bool
539Reader::readArray( Token &tokenStart )
540{
541 currentValue() = Value( arrayValue );
542 skipSpaces();
543 if ( *current_ == ']' ) // empty array
544 {
545 Token endArray;
546 readToken( endArray );
547 return true;
548 }
549 int index = 0;
550 while ( true )
551 {
552 Value &value = currentValue()[ index++ ];
553 nodes_.push( &value );
554 bool ok = readValue();
555 nodes_.pop();
556 if ( !ok ) // error already set
557 return recoverFromError( tokenArrayEnd );
558
559 Token token;
560 // Accept Comment after last item in the array.
561 ok = readToken( token );
562 while ( token.type_ == tokenComment && ok )
563 {
564 ok = readToken( token );
565 }
566 bool badTokenType = ( token.type_ == tokenArraySeparator &&
567 token.type_ == tokenArrayEnd );
568 if ( !ok || badTokenType )
569 {
570 return addErrorAndRecover( "Missing ',' or ']' in array declaration",
571 token,
572 tokenArrayEnd );
573 }
574 if ( token.type_ == tokenArrayEnd )
575 break;
576 }
577 return true;
578}
579
580
581bool
582Reader::decodeNumber( Token &token )
583{
584 bool isDouble = false;
585 for ( Location inspect = token.start_; inspect != token.end_; ++inspect )
586 {
587 isDouble = isDouble
588 || in( *inspect, '.', 'e', 'E', '+' )
589 || ( *inspect == '-' && inspect != token.start_ );
590 }
591 if ( isDouble )
592 return decodeDouble( token );
593 Location current = token.start_;
594 bool isNegative = *current == '-';
595 if ( isNegative )
596 ++current;
597 Value::UInt threshold = (isNegative ? Value::UInt(-Value::minInt)
598 : Value::maxUInt) / 10;
599 Value::UInt value = 0;
600 while ( current < token.end_ )
601 {
602 Char c = *current++;
603 if ( c < '0' || c > '9' )
604 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
605 if ( value >= threshold )
606 return decodeDouble( token );
607 value = value * 10 + Value::UInt(c - '0');
608 }
609 if ( isNegative )
610 currentValue() = -Value::Int( value );
611 else if ( value <= Value::UInt(Value::maxInt) )
612 currentValue() = Value::Int( value );
613 else
614 currentValue() = value;
615 return true;
616}
617
618
619bool
620Reader::decodeDouble( Token &token )
621{
622 double value = 0;
623 const int bufferSize = 32;
624 int count;
625 int length = int(token.end_ - token.start_);
626 if ( length <= bufferSize )
627 {
628 Char buffer[bufferSize];
629 memcpy( buffer, token.start_, length );
630 buffer[length] = 0;
631 count = sscanf( buffer, "%lf", &value );
632 }
633 else
634 {
635 std::string buffer( token.start_, token.end_ );
636 count = sscanf( buffer.c_str(), "%lf", &value );
637 }
638
639 if ( count != 1 )
640 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
641 currentValue() = value;
642 return true;
643}
644
645
646bool
647Reader::decodeString( Token &token )
648{
649 std::string decoded;
650 if ( !decodeString( token, decoded ) )
651 return false;
652 currentValue() = decoded;
653 return true;
654}
655
656
657bool
658Reader::decodeString( Token &token, std::string &decoded )
659{
660 decoded.reserve( token.end_ - token.start_ - 2 );
661 Location current = token.start_ + 1; // skip '"'
662 Location end = token.end_ - 1; // do not include '"'
663 while ( current != end )
664 {
665 Char c = *current++;
666 if ( c == '"' )
667 break;
668 else if ( c == '\\' )
669 {
670 if ( current == end )
671 return addError( "Empty escape sequence in string", token, current );
672 Char escape = *current++;
673 switch ( escape )
674 {
675 case '"': decoded += '"'; break;
676 case '/': decoded += '/'; break;
677 case '\\': decoded += '\\'; break;
678 case 'b': decoded += '\b'; break;
679 case 'f': decoded += '\f'; break;
680 case 'n': decoded += '\n'; break;
681 case 'r': decoded += '\r'; break;
682 case 't': decoded += '\t'; break;
683 case 'u':
684 {
685 unsigned int unicode;
686 if ( !decodeUnicodeCodePoint( token, current, end, unicode ) )
687 return false;
688 decoded += codePointToUTF8(unicode);
689 }
690 break;
691 default:
692 return addError( "Bad escape sequence in string", token, current );
693 }
694 }
695 else
696 {
697 decoded += c;
698 }
699 }
700 return true;
701}
702
703bool
704Reader::decodeUnicodeCodePoint( Token &token,
705 Location &current,
706 Location end,
707 unsigned int &unicode )
708{
709
710 if ( !decodeUnicodeEscapeSequence( token, current, end, unicode ) )
711 return false;
712 if (unicode >= 0xD800 && unicode <= 0xDBFF)
713 {
714 // surrogate pairs
715 if (end - current < 6)
716 return addError( "additional six characters expected to parse unicode surrogate pair.", token, current );
717 unsigned int surrogatePair;
718 if (*(current++) == '\\' && *(current++)== 'u')
719 {
720 if (decodeUnicodeEscapeSequence( token, current, end, surrogatePair ))
721 {
722 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
723 }
724 else
725 return false;
726 }
727 else
728 return addError( "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current );
729 }
730 return true;
731}
732
733bool
734Reader::decodeUnicodeEscapeSequence( Token &token,
735 Location &current,
736 Location end,
737 unsigned int &unicode )
738{
739 if ( end - current < 4 )
740 return addError( "Bad unicode escape sequence in string: four digits expected.", token, current );
741 unicode = 0;
742 for ( int index =0; index < 4; ++index )
743 {
744 Char c = *current++;
745 unicode *= 16;
746 if ( c >= '0' && c <= '9' )
747 unicode += c - '0';
748 else if ( c >= 'a' && c <= 'f' )
749 unicode += c - 'a' + 10;
750 else if ( c >= 'A' && c <= 'F' )
751 unicode += c - 'A' + 10;
752 else
753 return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current );
754 }
755 return true;
756}
757
758
759bool
760Reader::addError( const std::string &message,
761 Token &token,
762 Location extra )
763{
764 ErrorInfo info;
765 info.token_ = token;
766 info.message_ = message;
767 info.extra_ = extra;
768 errors_.push_back( info );
769 return false;
770}
771
772
773bool
774Reader::recoverFromError( TokenType skipUntilToken )
775{
776 int errorCount = int(errors_.size());
777 Token skip;
778 while ( true )
779 {
780 if ( !readToken(skip) )
781 errors_.resize( errorCount ); // discard errors caused by recovery
782 if ( skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream )
783 break;
784 }
785 errors_.resize( errorCount );
786 return false;
787}
788
789
790bool
791Reader::addErrorAndRecover( const std::string &message,
792 Token &token,
793 TokenType skipUntilToken )
794{
795 addError( message, token );
796 return recoverFromError( skipUntilToken );
797}
798
799
800Value &
801Reader::currentValue()
802{
803 return *(nodes_.top());
804}
805
806
808Reader::getNextChar()
809{
810 if ( current_ == end_ )
811 return 0;
812 return *current_++;
813}
814
815
816void
817Reader::getLocationLineAndColumn( Location location,
818 int &line,
819 int &column ) const
820{
821 Location current = begin_;
822 Location lastLineStart = current;
823 line = 0;
824 while ( current < location && current != end_ )
825 {
826 Char c = *current++;
827 if ( c == '\r' )
828 {
829 if ( *current == '\n' )
830 ++current;
831 lastLineStart = current;
832 ++line;
833 }
834 else if ( c == '\n' )
835 {
836 lastLineStart = current;
837 ++line;
838 }
839 }
840 // column & line start at 1
841 column = int(location - lastLineStart) + 1;
842 ++line;
843}
844
845
846std::string
847Reader::getLocationLineAndColumn( Location location ) const
848{
849 int line, column;
850 getLocationLineAndColumn( location, line, column );
851 char buffer[18+16+16+1];
852 sprintf( buffer, "Line %d, Column %d", line, column );
853 return buffer;
854}
855
856
857std::string
859{
860 std::string formattedMessage;
861 for ( Errors::const_iterator itError = errors_.begin();
862 itError != errors_.end();
863 ++itError )
864 {
865 const ErrorInfo &error = *itError;
866 formattedMessage += "* " + getLocationLineAndColumn( error.token_.start_ ) + "\n";
867 formattedMessage += " " + error.message_ + "\n";
868 if ( error.extra_ )
869 formattedMessage += "See " + getLocationLineAndColumn( error.extra_ ) + " for detail.\n";
870 }
871 return formattedMessage;
872}
873
874
875std::istream& operator>>( std::istream &sin, Value &root )
876{
877 Json::Reader reader;
878 bool ok = reader.parse(sin, root, true);
879 //JSON_ASSERT( ok );
880 if (!ok) throw std::runtime_error(reader.getFormatedErrorMessages());
881 return sin;
882}
883
884
885} // namespace Json
Configuration passed to reader and writer.
Definition: features.h:13
bool strictRoot_
true if root must be either an array or an object value. Default: false.
Definition: features.h:37
bool allowComments_
true if comments are allowed. Default: true.
Definition: features.h:34
static Features all()
A configuration that allows all features and assumes all strings are UTF-8.
Definition: json_reader.cpp:27
Features()
Initialize the configuration like JsonConfig::allFeatures;.
Definition: json_reader.cpp:19
static Features strictMode()
A configuration that is strictly compatible with the JSON specification.
Definition: json_reader.cpp:34
Unserialize a JSON document into a Value.
Definition: reader.h:17
Reader()
Constructs a Reader allowing all features for parsing.
std::string getFormatedErrorMessages() const
Returns a user friendly string that list errors in the parsed document.
char Char
Definition: reader.h:19
const Char * Location
Definition: reader.h:20
bool parse(const std::string &document, Value &root, bool collectComments=true)
Read a Value from a JSON document.
Represents a JSON value.
Definition: value.h:112
Json::UInt UInt
Definition: value.h:122
bool isArray() const
void setComment(const char *comment, CommentPlacement placement)
Comments must be //... or /* ... *‍/.
static const Int minInt
Definition: value.h:127
bool isObject() const
static const Int maxInt
Definition: value.h:128
Json::Int Int
Definition: value.h:123
JSON (JavaScript Object Notation).
Definition: features.h:6
std::istream & operator>>(std::istream &, Value &)
Read from 'sin' into 'root'.
static bool containsNewLine(Reader::Location begin, Reader::Location end)
Definition: json_reader.cpp:60
CommentPlacement
Definition: value.h:36
@ commentAfterOnSameLine
a comment just after a value on the same line
Definition: value.h:38
@ commentBefore
a comment placed on the line before a value
Definition: value.h:37
@ commentAfter
a comment on the line after a value (only make sense for root value)
Definition: value.h:39
static bool in(Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4)
Definition: json_reader.cpp:47
@ arrayValue
array value (ordered list)
Definition: value.h:31
@ objectValue
object value (collection of name/value pairs).
Definition: value.h:32
static std::string codePointToUTF8(unsigned int cp)
Definition: json_reader.cpp:69

SourceForge Logo hosts this site. Send comments to:
Json-cpp Developers