cfl_tokenreader.cpp
Go to the documentation of this file.
1 /** @file cfl_tokenreader.cpp @brief Class TokenReader */
2 
3 /* FAU Discrete Event Systems Library (libfaudes)
4 
5 Copyright (C) 2006 Bernd Opitz
6 Copyright (C) 2006 Thomas Moor
7 Exclusive copyright is granted to Klaus Schmidt
8 
9 This library is free software; you can redistribute it and/or
10 modify it under the terms of the GNU Lesser General Public
11 License as published by the Free Software Foundation; either
12 version 2.1 of the License, or (at your option) any later version.
13 
14 This library is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 Lesser General Public License for more details.
18 
19 You should have received a copy of the GNU Lesser General Public
20 License along with this library; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22 
23 
24 
25 
26 #include "cfl_tokenwriter.h"
27 #include "cfl_tokenreader.h"
28 
29 namespace faudes {
30 
31 // TokenReader(mode,instring)
32 TokenReader::TokenReader(Mode mode, const std::string& rInString)
33  : mMode(mode), mpStream(NULL), mFileName("")
34 {
35  switch(mode) {
36  case String:
37  // use mSStream
38  FD_DV("TokenReader::Tokenreader(String, ...): " << rInString);
39  mpSStream= new std::istringstream(rInString, std::istringstream::in | std::istringstream::binary);
41  Rewind();
42  break;
43  case File:
44  // set up mFStream
45  FD_DV("TokenReader::Tokenreader(File, \"" << rInString <<"\")");
46  mFStream.exceptions(std::ios::badbit|std::ios::failbit);
47  try{
48  mFStream.open(rInString.c_str(), std::ios::in | std::ios::binary);
49  }
50  catch (std::ios::failure&) {
51  std::stringstream errstr;
52  errstr << "Exception opening/reading file \""<< rInString << "\"";
53  throw Exception("TokenReader::TokenReader", errstr.str(), 1);
54  }
55  mFileName=rInString;
57  Rewind();
58  break;
59  default:
60  std::stringstream errstr;
61  errstr << "Invalid Mode / Not implemented";
62  throw Exception("TokenReader::TokenReader(mode,instring)", errstr.str(), 1);
63  }
64 }
65 
66 
67 // TokenReader(rFilename)
68 TokenReader::TokenReader(const std::string& rFilename)
69  : mMode(File), mpStream(NULL), mFileName(rFilename)
70 {
71  // set up mFStream
72  FD_DV("TokenReader::Tokenreader(File, \"" << rFilename <<"\")");
73  mFStream.exceptions(std::ios::badbit|std::ios::failbit);
74  try{
75  mFStream.open(rFilename.c_str(), std::ios::in | std::ios::binary);
76  }
77  catch (std::ios::failure&) {
78  std::stringstream errstr;
79  errstr << "Exception opening/reading file \""<< rFilename << "\"";
80  throw Exception("TokenReader::TokenReader", errstr.str(), 1);
81  }
82  mFileName=rFilename;
84  Rewind();
85 }
86 
87 
88 // destruct
90  if(mMode==String) delete mpSStream;
91 }
92 
93 // Stream()
94 std::istream* TokenReader::Streamp(void) {
95  return mpStream;
96 }
97 
98 // Rewind()
99 void TokenReader::Rewind(void) {
100  FD_DV("TokenReader::Rewind: \"" << mFileName <<"\"");
101  try {
102  mpStream->clear();
103  mpStream->seekg(0);
105  mLevel=0;
106  mLineCount=1;
107  mFilePos=0;
108  mFaudesComments=true;
109  mLevelState.resize(mLevel+1);
110  mLevelState.back().mLabel="OUTER";
111  mLevelState.back().mStartPosition=mFilePos;
112  mLevelState.back().mStartLine=mLineCount;
113  mLevelState.back().mStartPeek=mPeekToken;
114  mLevelState.back().mFaudesComments=mFaudesComments;
115  }
116  catch (std::ios::failure&) {
117  std::stringstream errstr;
118  errstr << "Exception opening/reading file in "<< FileLine();
119  throw Exception("TokenReader::Rewind", errstr.str(), 1);
120  }
121 }
122 
123 
124 // FileName()
125 std::string TokenReader::FileName(void) const {
126  return mFileName;
127 }
128 
129 // Peek(token)
130 bool TokenReader::Peek(Token& token) {
131  // read to peek buffer
132  if(mPeekToken.IsNone()) {
133  try{
135  } catch (std::ios::failure&) {
136  std::stringstream errstr;
137  errstr << "Exception opening/reading file in "<< FileLine();
138  throw Exception("TokenReader::Peek", errstr.str(), 1);
139  }
140  }
141  // get from peek buffer
142  token=mPeekToken;
143  // substitute empty sections
144  if(token.IsEmpty()) token.ClrEnd();
145  // done
146  FD_DV("TokenReader: Peek: " << token.Str());
147  return !token.IsNone();
148 }
149 
150 // Get(token)
151 bool TokenReader::Get(Token& token) {
152  bool res;
153  // get token from peek buffer
154  res=Peek(token);
155  // invalidate buffer: case a
157  // invalidate buffer: case b
158  if(mPeekToken.IsEmpty()) {
159  FD_DV("TokenReader: fake end : " << mPeekToken.Str());
160  mPeekToken.SetEnd(std::string(mPeekToken.StringValue()));
161  }
162  // bail out on error
163  if(!res) return false;
164  // ignore misbehavong <br> tag in by level management
165  if(token.IsBegin("br") || token.IsEnd("br")) return true;
166  // track state (level of nested sections, filepos etc)
167  mFilePos=mpStream->tellg();
168  if(token.IsBegin()) {
169  // track level
170  mLevel++;
171  // update state
172  if(token.StringValue()=="ReferencePage") mFaudesComments=false;
173  if(token.StringValue()=="html") mFaudesComments=false;
174  if(token.StringValue()=="Html") mFaudesComments=false;
175  if(token.StringValue()=="HTML") mFaudesComments=false;
176  // record state
177  mLevelState.resize(mLevel+1);
178  mLevelState.back().mLabel=token.StringValue();
179  mLevelState.back().mStartPosition=mFilePos;
180  mLevelState.back().mStartLine=mLineCount;
181  mLevelState.back().mStartPeek=mPeekToken;
182  mLevelState.back().mFaudesComments=mFaudesComments;
183  }
184  if(token.IsEnd()) {
185 #ifdef FAUDES_CHECKED
186  if(token.StringValue()!=mLevelState.back().mLabel)
187  FD_WARN("TokenReader::Get(): end of section \"" << token.StringValue() << "\" at " << FileLine()
188  << " should match \"" << mLevelState.back().mLabel << "\" at line " << mLevelState.back().mStartLine );
189 #endif
190  if(mLevel<1) {
191 #ifdef FAUDES_CHECKED
192  FD_WARN("TokenReader::Get(): Unbalanced end of section \"" << token.StringValue() << "\" at " << FileLine());
193 #endif
194  token.SetNone();
195  return false;
196  }
197  mLevel--;
198  mLevelState.pop_back();
199  mFaudesComments=mLevelState.back().mFaudesComments;
200  }
201  FD_DV("TokenReader:Get(): " << token.Str());
202 
203  return res;
204 }
205 
206 // SeekBegin(label)
207 void TokenReader::SeekBegin(const std::string& rLabel) {
208  Token token;
209  SeekBegin(rLabel,token);
210 }
211 
212 // SeekBegin(label)
213 void TokenReader::SeekBegin(const std::string& rLabel, Token& rToken) {
214  // search for begin at any descending level, no rewind
215  FD_DV("TokenReader::SeekBegin: " << rLabel << " at " << FileLine() << " level " << mLevel);
216  int level=mLevel;
217  for (;;) {
218  // swollow some plain text (e.g. html may contain plain text that cannot be tokenized properly)
219  std::string swallow;
220  ReadCharacterData(swallow);
221  // exception: did not get a token at all (incl. eof)
222  if(!Peek(rToken)) {
223  Rewind();
224  std::stringstream errstr;
225  errstr << "Section \"" << rLabel << "\" expected at " << FileLine() << " no more tokens";
226  throw Exception("TokenReader::SeekBegin", errstr.str(), 51);
227  }
228  // exception: current section ends
229  if((rToken.Type() == Token::End) && (mLevel == level)) {
230  mpStream->seekg(mLevelState[level].mStartPosition);
231  mFilePos=mLevelState[level].mStartPosition;
232  mLineCount=mLevelState[level].mStartLine;
233  mPeekToken=mLevelState[level].mStartPeek;
234  mFaudesComments=mLevelState[level].mFaudesComments;
235  std::stringstream errstr;
236  errstr << "Section \"" << rLabel << "\" expected at " << FileLine()
237  << "current section ended unexpected. Found: " << rToken.StringValue() << " Type " << rToken.Type();
238  throw Exception("TokenReader::SeekBegin", errstr.str(), 51);
239  }
240  // success: found begin section
241  if ((rToken.IsBegin()) && (rToken.StringValue() == rLabel))
242  break;
243  // go on seeking
244  Get(rToken);
245  }
246 }
247 
248 // ReadBegin(label)
249 void TokenReader::ReadBegin(const std::string& rLabel) {
250  Token token;
251  ReadBegin(rLabel,token);
252 }
253 
254 // ReadBegin(label,token)
255 void TokenReader::ReadBegin(const std::string& rLabel, Token& rToken) {
256  FD_DV("Looking for Section \"" << rLabel << "\"");
257  try {
258  int level=mLevel;
259  int repcnt=0;
260  long int startpos=mFilePos;
261  FD_DV("section level " << level << " current pos " << startpos << " begin of section " << mLevelState[level].mStartPosition);
262  // search for begin at current level
263  for (;;) {
264  // swallow some plain text (e.g. html may contain plain text that cannot be tokenized properly)
265  std::string swallow;
266  ReadCharacterData(swallow);
267  // exception: did not get a token at all (incl eof)
268  if(!Peek(rToken)) {
269  std::stringstream errstr;
270  errstr << "Section \"" << rLabel << "\" expected at " << FileLine() << ", no token at all";
271  throw Exception("TokenReader::ReadBegin Peek", errstr.str(), 51);
272  }
273  // success: found begin section
274  if((rToken.IsBegin()) && (rToken.StringValue() == rLabel) && (mLevel==level)) {
275  Get(rToken);
276  break;
277  }
278  // exception: did not find begin label
279  if((mFilePos>=startpos) && (repcnt==1)) {
280  std::stringstream errstr;
281  errstr << "Section \"" << rLabel << "\" expected at " << FileLine() << ", did not find begin label";
282  throw Exception("TokenReader::ReadBegin: Missing", errstr.str(), 51);
283  }
284  // exception: did not find begin label
285  if(repcnt>1) {
286  std::stringstream errstr;
287  errstr << "Section \"" << rLabel << "\" expected at " << FileLine() << ", did not find begin label";
288  throw Exception("TokenReader::ReadBegin: Missing", errstr.str(), 51);
289  }
290  // rewind once when current section ends
291  if(rToken.IsEnd() && !rToken.IsBegin() && (mLevel == level)) {
292  mpStream->seekg(mLevelState[level].mStartPosition);
293  mFilePos=mLevelState[level].mStartPosition;
294  mLineCount=mLevelState[level].mStartLine;
295  mPeekToken=mLevelState[level].mStartPeek;
296  mFaudesComments=mLevelState[level].mFaudesComments;
297  repcnt++;
298  continue;
299  }
300  // skip this token
301  Get(rToken);
302  }
303  }
304  // catch my seek/tell errors
305  catch (std::ios::failure&) {
306  std::stringstream errstr;
307  errstr << "Section \"" << rLabel << "\" expected at " << FileLine();
308  throw Exception("TokenReader::ReadBegin Rewind", errstr.str(), 1);
309  }
310 }
311 
312 
313 // ExistsBegin(label)
314 bool TokenReader::ExistsBegin(const std::string& rLabel) {
315  FD_DV("TokenReader::ExistsBegin(): looking for Section \"" << rLabel << "\"");
316  try {
317  int level=mLevel;
318  int rwcnt=0;
319  long int startpos=mFilePos;
320  FD_DV("section level " << level << " current pos " << startpos << " begin of section " << mLevelState[level].mStartPosition);
321  Token token;
322  // search for begin at current level
323  for(;;) {
324  // swallow some plain text (e.g. html may contain plain text that cannot be tokenized properly)
325  std::string swallow;
326  ReadCharacterData(swallow);
327  // fail: did not get a token at all (e.g. eof)
328  if(!Peek(token)) {
329  return false;
330  }
331  // success: found begin section
332  if((token.IsBegin()) && (token.StringValue() == rLabel) && (mLevel==level)) {
333  return true;
334  }
335  // rewind once when current section ends
336  if(token.IsEnd() && (mLevel == level) && (rwcnt==0)) {
337  mpStream->seekg(mLevelState[level].mStartPosition);
338  mFilePos=mLevelState[level].mStartPosition;
339  mLineCount=mLevelState[level].mStartLine;
340  mPeekToken=mLevelState[level].mStartPeek;
341  mFaudesComments=mLevelState[level].mFaudesComments;
342  rwcnt++;;
343  if(rwcnt>1) return false; // survive funny mFilePos in e.g. empty sections
344  continue;
345  }
346  // fail: did not find begin label are one turn around
347  if((mFilePos>=startpos) && (rwcnt>0) && (mLevel == level)) {
348  return false;
349  }
350  // skip this token
351  Get(token);
352  }
353  }
354  // catch my seek/tell errors
355  catch (std::ios::failure&) {
356  std::stringstream errstr;
357  errstr << "IO Error while scanning Section \"" << rLabel << "\" at " << FileLine();
358  throw Exception("TokenReader::ExistsBegin IO", errstr.str(), 1);
359  }
360  return false;
361 }
362 
363 // ReadEnd(label)
364 void TokenReader::ReadEnd(const std::string& rLabel) {
365  FD_DV("TokenReader::ReadEnd: " << rLabel << " at " << FileLine() );
366  // search for end at current level
367  int level=mLevel;
368  Token token;
369  for (;;) {
370  // swallow some plain text (e.g. html may contain plain text that cannot be tokenized properly)
371  std::string swallow;
372  ReadCharacterData(swallow);
373  // exception: did not get a token at all
374  if(!Peek(token)) {
375  std::stringstream errstr;
376  errstr << "end of section \"" << rLabel << "\" expected at " << FileLine();
377  throw Exception("TokenReader::ReadEnd", errstr.str(), 51);
378  }
379  // success: found end of current section
380  if(token.IsEnd() && !token.IsBegin() && (token.StringValue() == rLabel) && (mLevel==level)) {
381  Get(token);
382  break;
383  }
384  // exception: current section ends with unexpected label
385  if(mLevel<level) {
386  std::stringstream errstr;
387  errstr << "end of Section \"" << rLabel << "\" expected at " << FileLine();
388  throw Exception("TokenReader::ReadEnd", errstr.str(), 51);
389  }
390  // get the token and continue
391  Get(token);
392  //std::cout << token.Str() << "\n";
393  }
394 }
395 
396 // Recover()
397 bool TokenReader::Recover(int level) {
398  // paranoia
399  if(level<0) return false;
400  // trivial cases
401  if(level>mLevel) return false;
402  if(level==mLevel) return true;
403  // loop until match
404  Token token;
405  while(Get(token))
406  if(mLevel<=level) break;
407  // done
408  return level==mLevel;
409 }
410 
411 // Recover()
412 bool TokenReader::Reset(int level) {
413  // paranoia
414  if(level<0) return false;
415  if(level>mLevel) return false;
416  // trivial case
417  if(level==0) {
418  Rewind();
419  return true;
420  }
421  // loop until end
422  Token token;
423  while(Get(token)) {
424  if((mLevel==level) && token.IsEnd()) break;
425  if(mLevel<level) return false;
426  }
427  // do the rewind
428  mpStream->seekg(mLevelState[level].mStartPosition);
429  mFilePos=mLevelState[level].mStartPosition;
430  mLineCount=mLevelState[level].mStartLine;
431  mPeekToken=mLevelState[level].mStartPeek;
432  mFaudesComments=mLevelState[level].mFaudesComments;
433  return true;
434 }
435 
436 // Eos(label)
437 bool TokenReader::Eos(const std::string& rLabel) {
438  // peek token and check for end of section
439  Token token;
440  Peek(token);
441  if(! (token.IsEnd() && !token.IsBegin()))
442  return false;
443  if((token.IsEnd() && !token.IsBegin()) && (token.StringValue() == rLabel))
444  return true;
445  std::stringstream errstr;
446  errstr << "section end \"" << rLabel << "\" expected at " << FileLine();
447  throw Exception("TokenReader::Eos", errstr.str(), 51);
448  return false;
449 }
450 
451 
452 // ReadInteger()
453 long int TokenReader::ReadInteger(void) {
454  Token token;
455  Get(token);
456  if(!token.IsInteger()) {
457  std::stringstream errstr;
458  errstr << "Integer expected at " << FileLine();
459  throw Exception("TokenReader::TokenReader", errstr.str(), 50);
460  }
461  return token.IntegerValue();
462 }
463 
464 // ReadFloat()
466  Token token;
467  Get(token);
468  if((!token.IsFloat()) && (!token.IsInteger())) {
469  std::stringstream errstr;
470  errstr << "Float expected at " << FileLine();
471  throw Exception("TokenReader::TokenReader", errstr.str(), 50);
472  }
473  return token.FloatValue();
474 }
475 
476 // ReadString()
477 std::string TokenReader::ReadString(void) {
478  Token token;
479  Get(token);
480  if(!token.IsString()) {
481  std::stringstream errstr;
482  errstr << "String expected at " << FileLine();
483  throw Exception("TokenReader::TokenReader", errstr.str(), 50);
484  }
485  return token.StringValue();
486 }
487 
488 
489 // ReadOption()
490 std::string TokenReader::ReadOption(void) {
491  Token token;
492  Get(token);
493  if(!token.IsOption()) {
494  std::stringstream errstr;
495  errstr << "Option expected at " << FileLine();
496  throw Exception("TokenReader::TokenReader", errstr.str(), 50);
497  }
498  return token.OptionValue();
499 }
500 
501 // ReadBinary()
502 void TokenReader::ReadBinary(std::string& rData) {
503  Token token;
504  Get(token);
505  if(!token.IsBinary()) {
506  std::stringstream errstr;
507  errstr << "Binary string expected at " << FileLine();
508  throw Exception("TokenReader::TokenReader", errstr.str(), 50);
509  }
510  rData = token.StringValue();
511 }
512 
513 
514 // ReadText()
515 void TokenReader::ReadText(const std::string& rLabel, std::string& rText) {
516  // insist in my begin tag
517  Token token;
518  Peek(token);
519  if(!token.IsBegin(rLabel)) {
520  std::stringstream errstr;
521  errstr << "Text element \""<< rLabel << "\" expected at " << FileLine();
522  throw Exception("TokenReader::TokenReader", errstr.str(), 50);
523  }
524  Get(token);
525  // do my text reading
526  int ll=Token::ReadEscapedString(mpStream,'<',rText);
527  if(ll<0) {
528  std::stringstream errstr;
529  errstr << "Text expected at " << FileLine();
530  throw Exception("TokenReader::TokenReader", errstr.str(), 50);
531  }
532  mLineCount+=ll;
533  // strip leading/trailing linefeeds
534  static const std::string line="\n\r\v";
535  std::size_t pos1=rText.find_first_not_of(line);
536  if(pos1!=std::string::npos)
537  rText=rText.substr(pos1);
538  else
539  rText.clear();
540  std::size_t pos2=rText.find_last_not_of(line);
541  if(pos2!=std::string::npos)
542  rText.erase(pos2+1);
543  // strip leading/trailing white if all in one line
544  static const std::string white=" \t";
545  if(pos1==0) {
546  pos1=rText.find_first_not_of(white);
547  if(pos1!=std::string::npos)
548  rText=rText.substr(pos1);
549  else
550  rText.clear();
551  std::size_t pos2=rText.find_last_not_of(white);
552  if(pos2!=std::string::npos)
553  rText.erase(pos2+1);
554  }
555  // insist in my end tag
556  Peek(token);
557  if(!token.IsEnd(rLabel)) {
558  std::stringstream errstr;
559  errstr << "End of text element \""<< rLabel << "\" expected at " << FileLine();
560  throw Exception("TokenReader::TokenReader", errstr.str(), 50);
561  }
562  Get(token);
563 }
564 
565 // ReadVerbatim()
566 void TokenReader::ReadVerbatim(const std::string& rLabel, std::string& rString) {
567  // insist in my tag
568  Token token;
569  Peek(token);
570  if(!token.IsBegin(rLabel)) {
571  std::stringstream errstr;
572  errstr << "Verbatim element \""<< rLabel << "\" expected at " << FileLine();
573  throw Exception("TokenReader::TokenReader", errstr.str(), 50);
574  }
575  Get(token);
576  rString.clear();
577  // loop cdata
578  int cnt=0;
579  rString.clear();
580  while(Peek(token)) {
581  if(!token.IsString()) break;
582  if(cnt>0 && !token.IsCdata()) break;
583  Get(token);
584  rString.append(token.StringValue());
585  cnt++;
586  }
587  // strip leading/trailing linefeeds
588  static const std::string line="\n\r\v";
589  std::size_t pos1=rString.find_first_not_of(line);
590  if(pos1!=std::string::npos)
591  rString=rString.substr(pos1);
592  else
593  rString.clear();
594  std::size_t pos2=rString.find_last_not_of(line);
595  if(pos2!=std::string::npos)
596  rString.erase(pos2+1);
597  // insist in my end tag
598  Peek(token);
599  if(!token.IsEnd(rLabel)) {
600  std::stringstream errstr;
601  errstr << "End of verbatim element \""<< rLabel << "\" expected at " << FileLine();
602  throw Exception("TokenReader::TokenReader", errstr.str(), 50);
603  }
604  Get(token);
605 }
606 
607 // ReadCharacterData()
608 void TokenReader::ReadCharacterData(std::string& rData) {
609  // if we have a markup token in the buffer there is no character data except white space
610  if(mPeekToken.IsBegin() || mPeekToken.IsEnd()) {
611  FD_DV("TokenReader::ReadCharacterData(): tag in buffer");
612  rData=mPeekToken.PreceedingSpace();
614  return;
615  }
616  // do my own reading
618  if(ll<0) {
619  std::stringstream errstr;
620  errstr << "Missformed character data at " << FileLine() << ": " << rData;
621  throw Exception("TokenReader::TokenReader", errstr.str(), 50);
622  }
623  mLineCount+=ll;
624  // prepend peek buffers string value (better: need rewind!)
625  if(mPeekToken.IsString())
626  rData=mPeekToken.StringValue() + " " + rData;
627  // invalidate buffer
629 }
630 
631 // ReadSection()
632 void TokenReader::ReadSection(std::string& rSectionString) {
633  // record current level
634  int clevel = Level();
635  // setup token writer for destination // need a better interface here: provide string buffer
637  tw.Endl(true);
638  // token copy loop
639  while(true) {
640  // see whether we can grab and copy some character data
641  std::string cdata;
642  ReadCharacterData(cdata);
643  tw.WriteCharacterData(cdata);
644  // break end of my level
645  Token token;
646  if(!Peek(token)) break;
647  if(token.IsEnd() && !token.IsBegin() && Level()==clevel)
648  break;
649  // get and copy markup token
650  Get(token);
651  token.PreceedingSpace("n"); // explicit no formating
652  tw.Write(token);
653  }
654  // done
655  rSectionString=tw.Str();
656 }
657 
658 
659 // Line()
660 int TokenReader::Line(void) const {
661  return mLineCount;
662 }
663 
664 // FileLine()
665 std::string TokenReader::FileLine(void) const {
666  if(mFileName!="")
667  return "("+ mFileName + ":" + ToStringInteger(mLineCount) +")";
668  else
669  return "(#" + ToStringInteger(mLineCount) +")";
670 }
671 
672 } // namespace faudes

libFAUDES 2.28c --- 2016.09.30 --- c++ api documentaion by doxygen