cfl_tokenreader.cpp
Go to the documentation of this file.
1 /** @file cfl_tokenreader.cpp @brief Class TokenReader */
2 
3 /* FAU Discrete Event Systems Library (libfaudes)
4 
5 Copyright (C) 2006 Bernd Opitz
6 Copyright (C) 2006 Thomas Moor
7 Exclusive copyright is granted to Klaus Schmidt
8 
9 This library is free software; you can redistribute it and/or
10 modify it under the terms of the GNU Lesser General Public
11 License as published by the Free Software Foundation; either
12 version 2.1 of the License, or (at your option) any later version.
13 
14 This library is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 Lesser General Public License for more details.
18 
19 You should have received a copy of the GNU Lesser General Public
20 License along with this library; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22 
23 
24 
25 
26 #include "cfl_tokenwriter.h"
27 #include "cfl_tokenreader.h"
28 
29 namespace faudes {
30 
31 // TokenReader(mode,instring)
32 TokenReader::TokenReader(Mode mode, const std::string& rInString)
33  : mMode(mode), mpStream(NULL), mFileName("")
34 {
35  switch(mode) {
36  case String:
37  // use mSStream
38  FD_DV("TokenReader::Tokenreader(String, ...): " << rInString);
39  mpSStream= new std::istringstream(rInString, std::istringstream::in | std::istringstream::binary);
41  Rewind();
42  break;
43  case File:
44  // set up mFStream
45  FD_DV("TokenReader::Tokenreader(File, \"" << rInString <<"\")");
46  mFStream.exceptions(std::ios::badbit|std::ios::failbit);
47  try{
48  mFStream.open(rInString.c_str(), std::ios::in | std::ios::binary);
49  }
50  catch (std::ios::failure&) {
51  std::stringstream errstr;
52  errstr << "Exception opening/reading file \""<< rInString << "\"";
53  throw Exception("TokenReader::TokenReader", errstr.str(), 1);
54  }
55  mFileName=rInString;
57  Rewind();
58  break;
59  default:
60  std::stringstream errstr;
61  errstr << "Invalid Mode / Not implemented";
62  throw Exception("TokenReader::TokenReader(mode,instring)", errstr.str(), 1);
63  }
64 }
65 
66 
67 // TokenReader(rFilename)
68 TokenReader::TokenReader(const std::string& rFilename)
69  : mMode(File), mpStream(NULL), mFileName(rFilename)
70 {
71  // set up mFStream
72  FD_DV("TokenReader::Tokenreader(File, \"" << rFilename <<"\")");
73  mFStream.exceptions(std::ios::badbit|std::ios::failbit);
74  try{
75  mFStream.open(rFilename.c_str(), std::ios::in | std::ios::binary);
76  }
77  catch (std::ios::failure&) {
78  std::stringstream errstr;
79  errstr << "Exception opening/reading file \""<< rFilename << "\"";
80  throw Exception("TokenReader::TokenReader", errstr.str(), 1);
81  }
82  mFileName=rFilename;
84  Rewind();
85 }
86 
87 
88 // destruct
90  if(mMode==String) delete mpSStream;
91 }
92 
93 // Stream()
94 std::istream* TokenReader::Streamp(void) {
95  return mpStream;
96 }
97 
98 // Rewind()
99 void TokenReader::Rewind(void) {
100  FD_DV("TokenReader::Rewind: \"" << mFileName <<"\"");
101  try {
102  mpStream->clear();
103  mpStream->seekg(0);
105  mLevel=0;
106  mLineCount=1;
107  mFilePos=0;
108  mFaudesComments=true;
109  mLevelState.resize(mLevel+1);
110  mLevelState.back().mLabel="OUTER";
111  mLevelState.back().mStartPosition=mFilePos;
112  mLevelState.back().mStartLine=mLineCount;
113  mLevelState.back().mStartPeek=mPeekToken;
114  mLevelState.back().mFaudesComments=mFaudesComments;
115  }
116  catch (std::ios::failure&) {
117  std::stringstream errstr;
118  errstr << "Exception opening/reading file in "<< FileLine();
119  throw Exception("TokenReader::Rewind", errstr.str(), 1);
120  }
121 }
122 
123 
124 // FileName()
125 std::string TokenReader::FileName(void) const {
126  return mFileName;
127 }
128 
129 // Peek(token)
130 bool TokenReader::Peek(Token& token) {
131  // read to peek buffer
132  if(mPeekToken.IsNone()) {
133  try{
135  } catch (std::ios::failure&) {
136  std::stringstream errstr;
137  errstr << "Exception opening/reading file in "<< FileLine();
138  throw Exception("TokenReader::Peek", errstr.str(), 1);
139  }
140  }
141  // get from peek buffer
142  token=mPeekToken;
143  // substitute empty sections
144  if(token.IsEmpty()) token.ClrEnd();
145  // done
146  FD_DV("TokenReader: Peek: " << token.Str());
147  return !token.IsNone();
148 }
149 
150 // Get(token)
151 bool TokenReader::Get(Token& token) {
152  bool res;
153  // get token from peek buffer
154  res=Peek(token);
155  // invalidate buffer: case a
157  // invalidate buffer: case b
158  if(mPeekToken.IsEmpty()) {
159  FD_DV("TokenReader: fake end : " << mPeekToken.Str());
160  mPeekToken.SetEnd(std::string(mPeekToken.StringValue()));
161  }
162  // bail out on error
163  if(!res) return false;
164  // ignore misbehavong <br> tag in by level management
165  if(token.IsBegin("br") || token.IsEnd("br")) return true;
166  // track state (level of nested sections, filepos etc)
167  mFilePos=mpStream->tellg();
168  if(token.IsBegin()) {
169  // track level
170  mLevel++;
171  // update state
172  if(token.StringValue()=="ReferencePage") mFaudesComments=false;
173  if(token.StringValue()=="html") mFaudesComments=false;
174  if(token.StringValue()=="Html") mFaudesComments=false;
175  if(token.StringValue()=="HTML") mFaudesComments=false;
176  // record state
177  mLevelState.resize(mLevel+1);
178  mLevelState.back().mLabel=token.StringValue();
179  mLevelState.back().mStartPosition=mFilePos;
180  mLevelState.back().mStartLine=mLineCount;
181  mLevelState.back().mStartPeek=mPeekToken;
182  mLevelState.back().mFaudesComments=mFaudesComments;
183  }
184  if(token.IsEnd()) {
185 #ifdef FAUDES_CHECKED
186  if(token.StringValue()!=mLevelState.back().mLabel)
187  FD_WARN("TokenReader::Get(): end of section \"" << token.StringValue() << "\" at " << FileLine()
188  << " should match \"" << mLevelState.back().mLabel << "\" at line " << mLevelState.back().mStartLine );
189 #endif
190  if(mLevel<1) {
191 #ifdef FAUDES_CHECKED
192  FD_WARN("TokenReader::Get(): Unbalanced end of section \"" << token.StringValue() << "\" at " << FileLine());
193 #endif
194  token.SetNone();
195  return false;
196  }
197  mLevel--;
198  mLevelState.pop_back();
199  mFaudesComments=mLevelState.back().mFaudesComments;
200  }
201  FD_DV("TokenReader:Get(): " << token.Str());
202 
203  return res;
204 }
205 
206 // SeekBegin(label)
207 void TokenReader::SeekBegin(const std::string& rLabel) {
208  Token token;
209  SeekBegin(rLabel,token);
210 }
211 
212 // SeekBegin(label)
213 void TokenReader::SeekBegin(const std::string& rLabel, Token& rToken) {
214  // search for begin at any descending level, no rewind
215  FD_DV("TokenReader::SeekBegin: " << rLabel << " at " << FileLine() << " level " << mLevel);
216  int level=mLevel;
217  for (;;) {
218  // swollow some plain text (e.g. html may contain plain text that cannot be tokenized properly)
219  std::string swallow;
220  ReadCharacterData(swallow);
221  // exception: did not get a token at all (incl. eof)
222  if(!Peek(rToken)) {
223  Rewind();
224  std::stringstream errstr;
225  errstr << "Section \"" << rLabel << "\" expected at " << FileLine() << " no more tokens";
226  throw Exception("TokenReader::SeekBegin", errstr.str(), 51);
227  }
228  // exception: current section ends
229  if((rToken.Type() == Token::End) && (mLevel == level)) {
230  mpStream->seekg(mLevelState[level].mStartPosition);
231  mFilePos=mLevelState[level].mStartPosition;
232  mLineCount=mLevelState[level].mStartLine;
233  mPeekToken=mLevelState[level].mStartPeek;
234  mFaudesComments=mLevelState[level].mFaudesComments;
235  std::stringstream errstr;
236  errstr << "Section \"" << rLabel << "\" expected at " << FileLine()
237  << "current section ended unexpected. Found: " << rToken.StringValue() << " Type " << rToken.Type();
238  throw Exception("TokenReader::SeekBegin", errstr.str(), 51);
239  }
240  // success: found begin section
241  if ((rToken.IsBegin()) && (rToken.StringValue() == rLabel))
242  break;
243  // go on seeking
244  Get(rToken);
245  }
246 }
247 
248 // ReadBegin(label)
249 void TokenReader::ReadBegin(const std::string& rLabel) {
250  Token token;
251  ReadBegin(rLabel,token);
252 }
253 
254 // ReadBegin(label,token)
255 void TokenReader::ReadBegin(const std::string& rLabel, Token& rToken) {
256  FD_DV("Looking for Section \"" << rLabel << "\"");
257  try {
258  int level=mLevel;
259  int repcnt=0;
260  long int startpos=mFilePos;
261  FD_DV("section level " << level << " current pos " << startpos << " begin of section " << mLevelState[level].mStartPosition);
262  // search for begin at current level
263  for (;;) {
264  // swallow some plain text (e.g. html may contain plain text that cannot be tokenized properly)
265  std::string swallow;
266  ReadCharacterData(swallow);
267  // exception: did not get a token at all (incl eof)
268  if(!Peek(rToken)) {
269  std::stringstream errstr;
270  errstr << "Section \"" << rLabel << "\" expected at " << FileLine() << ", no token at all";
271  throw Exception("TokenReader::ReadBegin Peek", errstr.str(), 51);
272  }
273  // success: found begin section
274  if((rToken.IsBegin()) && (rToken.StringValue() == rLabel) && (mLevel==level)) {
275  Get(rToken);
276  break;
277  }
278  // exception: did not find begin label
279  if((mFilePos>=startpos) && (repcnt==1)) {
280  std::stringstream errstr;
281  errstr << "Section \"" << rLabel << "\" expected at " << FileLine() << ", did not find begin label";
282  throw Exception("TokenReader::ReadBegin: Missing", errstr.str(), 51);
283  }
284  // exception: did not find begin label
285  if(repcnt>1) {
286  std::stringstream errstr;
287  errstr << "Section \"" << rLabel << "\" expected at " << FileLine() << ", did not find begin label";
288  throw Exception("TokenReader::ReadBegin: Missing", errstr.str(), 51);
289  }
290  // rewind once when current section ends
291  if(rToken.IsEnd() && !rToken.IsBegin() && (mLevel == level)) {
292  mpStream->seekg(mLevelState[level].mStartPosition);
293  mFilePos=mLevelState[level].mStartPosition;
294  mLineCount=mLevelState[level].mStartLine;
295  mPeekToken=mLevelState[level].mStartPeek;
296  mFaudesComments=mLevelState[level].mFaudesComments;
297  repcnt++;
298  continue;
299  }
300  // skip this token
301  Get(rToken);
302  }
303  }
304  // catch my seek/tell errors
305  catch (std::ios::failure&) {
306  std::stringstream errstr;
307  errstr << "Section \"" << rLabel << "\" expected at " << FileLine();
308  throw Exception("TokenReader::ReadBegin Rewind", errstr.str(), 1);
309  }
310 }
311 
312 
313 // ExistsBegin(label)
314 bool TokenReader::ExistsBegin(const std::string& rLabel) {
315  FD_DV("TokenReader::ExistsBegin(): looking for Section \"" << rLabel << "\"");
316  try {
317  int level=mLevel;
318  int rwcnt=0;
319  long int startpos=mFilePos;
320  FD_DV("section level " << level << " current pos " << startpos << " begin of section " << mLevelState[level].mStartPosition);
321  Token token;
322  // search for begin at current level
323  for(;;) {
324  // swallow some plain text (e.g. html may contain plain text that cannot be tokenized properly)
325  std::string swallow;
326  ReadCharacterData(swallow);
327  // fail: did not get a token at all (e.g. eof)
328  if(!Peek(token)) {
329  return false;
330  }
331  // success: found begin section
332  if((token.IsBegin()) && (token.StringValue() == rLabel) && (mLevel==level)) {
333  return true;
334  }
335  // rewind once when current section ends
336  if(token.IsEnd() && (mLevel == level) && (rwcnt==0)) {
337  mpStream->seekg(mLevelState[level].mStartPosition);
338  mFilePos=mLevelState[level].mStartPosition;
339  mLineCount=mLevelState[level].mStartLine;
340  mPeekToken=mLevelState[level].mStartPeek;
341  mFaudesComments=mLevelState[level].mFaudesComments;
342  rwcnt++;;
343  if(rwcnt>1) return false; // survive funny mFilePos in e.g. empty sections
344  continue;
345  }
346  // fail: did not find begin label are one turn around
347  if((mFilePos>=startpos) && (rwcnt>0) && (mLevel == level)) {
348  return false;
349  }
350  // skip this token
351  Get(token);
352  }
353  }
354  // catch my seek/tell errors
355  catch (std::ios::failure&) {
356  std::stringstream errstr;
357  errstr << "IO Error while scanning Section \"" << rLabel << "\" at " << FileLine();
358  throw Exception("TokenReader::ExistsBegin IO", errstr.str(), 1);
359  }
360  return false;
361 }
362 
363 // ReadEnd(label)
364 void TokenReader::ReadEnd(const std::string& rLabel) {
365  FD_DV("TokenReader::ReadEnd: " << rLabel << " at " << FileLine() );
366  // search for end at current level
367  int level=mLevel;
368  Token token;
369  for (;;) {
370  // swallow some plain text (e.g. html may contain plain text that cannot be tokenized properly)
371  std::string swallow;
372  ReadCharacterData(swallow);
373  // exception: did not get a token at all
374  if(!Peek(token)) {
375  std::stringstream errstr;
376  errstr << "end of section \"" << rLabel << "\" expected at " << FileLine();
377  throw Exception("TokenReader::ReadEnd", errstr.str(), 51);
378  }
379  // success: found end of current section
380  if(token.IsEnd() && !token.IsBegin() && (token.StringValue() == rLabel) && (mLevel==level)) {
381  Get(token);
382  break;
383  }
384  // exception: current section ends with unexpected label
385  if(mLevel<level) {
386  std::stringstream errstr;
387  errstr << "end of Section \"" << rLabel << "\" expected at " << FileLine();
388  throw Exception("TokenReader::ReadEnd", errstr.str(), 51);
389  }
390  // get the token and continue
391  Get(token);
392  //std::cout << token.Str() << "\n";
393  }
394 }
395 
396 // Recover()
397 bool TokenReader::Recover(int level) {
398  // paranoia
399  if(level<0) return false;
400  // trivial cases
401  if(level>mLevel) return false;
402  if(level==mLevel) return true;
403  // loop until match
404  Token token;
405  while(Get(token))
406  if(mLevel<=level) break;
407  // done
408  return level==mLevel;
409 }
410 
411 // Eos(label)
412 bool TokenReader::Eos(const std::string& rLabel) {
413  // peek token and check for end of section
414  Token token;
415  Peek(token);
416  if(! (token.IsEnd() && !token.IsBegin()))
417  return false;
418  if((token.IsEnd() && !token.IsBegin()) && (token.StringValue() == rLabel))
419  return true;
420  std::stringstream errstr;
421  errstr << "section end \"" << rLabel << "\" expected at " << FileLine();
422  throw Exception("TokenReader::Eos", errstr.str(), 51);
423  return false;
424 }
425 
426 
427 // ReadInteger()
428 long int TokenReader::ReadInteger(void) {
429  Token token;
430  Get(token);
431  if(!token.IsInteger()) {
432  std::stringstream errstr;
433  errstr << "Integer expected at " << FileLine();
434  throw Exception("TokenReader::TokenReader", errstr.str(), 50);
435  }
436  return token.IntegerValue();
437 }
438 
439 // ReadFloat()
441  Token token;
442  Get(token);
443  if((!token.IsFloat()) && (!token.IsInteger())) {
444  std::stringstream errstr;
445  errstr << "Float expected at " << FileLine();
446  throw Exception("TokenReader::TokenReader", errstr.str(), 50);
447  }
448  return token.FloatValue();
449 }
450 
451 // ReadString()
452 std::string TokenReader::ReadString(void) {
453  Token token;
454  Get(token);
455  if(!token.IsString()) {
456  std::stringstream errstr;
457  errstr << "String expected at " << FileLine();
458  throw Exception("TokenReader::TokenReader", errstr.str(), 50);
459  }
460  return token.StringValue();
461 }
462 
463 
464 // ReadOption()
465 std::string TokenReader::ReadOption(void) {
466  Token token;
467  Get(token);
468  if(!token.IsOption()) {
469  std::stringstream errstr;
470  errstr << "Option expected at " << FileLine();
471  throw Exception("TokenReader::TokenReader", errstr.str(), 50);
472  }
473  return token.OptionValue();
474 }
475 
476 // ReadBinary()
477 void TokenReader::ReadBinary(std::string& rData) {
478  Token token;
479  Get(token);
480  if(!token.IsBinary()) {
481  std::stringstream errstr;
482  errstr << "Binary string expected at " << FileLine();
483  throw Exception("TokenReader::TokenReader", errstr.str(), 50);
484  }
485  rData = token.StringValue();
486 }
487 
488 
489 // ReadText()
490 void TokenReader::ReadText(const std::string& rLabel, std::string& rText) {
491  // insist in my begin tag
492  Token token;
493  Peek(token);
494  if(!token.IsBegin(rLabel)) {
495  std::stringstream errstr;
496  errstr << "Text element \""<< rLabel << "\" expected at " << FileLine();
497  throw Exception("TokenReader::TokenReader", errstr.str(), 50);
498  }
499  Get(token);
500  // do my text reading
501  int ll=Token::ReadEscapedString(mpStream,'<',rText);
502  if(ll<0) {
503  std::stringstream errstr;
504  errstr << "Text expected at " << FileLine();
505  throw Exception("TokenReader::TokenReader", errstr.str(), 50);
506  }
507  mLineCount+=ll;
508  // strip leading/trailing linefeeds
509  static const std::string line="\n\r\v";
510  std::size_t pos1=rText.find_first_not_of(line);
511  if(pos1!=std::string::npos)
512  rText=rText.substr(pos1);
513  else
514  rText.clear();
515  std::size_t pos2=rText.find_last_not_of(line);
516  if(pos2!=std::string::npos)
517  rText.erase(pos2+1);
518  // strip leading/trailing white if all in one line
519  static const std::string white=" \t";
520  if(pos1==0) {
521  pos1=rText.find_first_not_of(white);
522  if(pos1!=std::string::npos)
523  rText=rText.substr(pos1);
524  else
525  rText.clear();
526  std::size_t pos2=rText.find_last_not_of(white);
527  if(pos2!=std::string::npos)
528  rText.erase(pos2+1);
529  }
530  // insist in my end tag
531  Peek(token);
532  if(!token.IsEnd(rLabel)) {
533  std::stringstream errstr;
534  errstr << "End of text element \""<< rLabel << "\" expected at " << FileLine();
535  throw Exception("TokenReader::TokenReader", errstr.str(), 50);
536  }
537  Get(token);
538 }
539 
540 // ReadVerbatim()
541 void TokenReader::ReadVerbatim(const std::string& rLabel, std::string& rString) {
542  // insist in my tag
543  Token token;
544  Peek(token);
545  if(!token.IsBegin(rLabel)) {
546  std::stringstream errstr;
547  errstr << "Verbatim element \""<< rLabel << "\" expected at " << FileLine();
548  throw Exception("TokenReader::TokenReader", errstr.str(), 50);
549  }
550  Get(token);
551  rString.clear();
552  // loop cdata
553  int cnt=0;
554  rString.clear();
555  while(Peek(token)) {
556  if(!token.IsString()) break;
557  if(cnt>0 && !token.IsCdata()) break;
558  Get(token);
559  rString.append(token.StringValue());
560  cnt++;
561  }
562  // strip leading/trailing linefeeds
563  static const std::string line="\n\r\v";
564  std::size_t pos1=rString.find_first_not_of(line);
565  if(pos1!=std::string::npos)
566  rString=rString.substr(pos1);
567  else
568  rString.clear();
569  std::size_t pos2=rString.find_last_not_of(line);
570  if(pos2!=std::string::npos)
571  rString.erase(pos2+1);
572  // insist in my end tag
573  Peek(token);
574  if(!token.IsEnd(rLabel)) {
575  std::stringstream errstr;
576  errstr << "End of verbatim element \""<< rLabel << "\" expected at " << FileLine();
577  throw Exception("TokenReader::TokenReader", errstr.str(), 50);
578  }
579  Get(token);
580 }
581 
582 // ReadCharacterData()
583 void TokenReader::ReadCharacterData(std::string& rData) {
584  // if we have a markup token in the buffer there is no character data except white space
585  if(mPeekToken.IsBegin() || mPeekToken.IsEnd()) {
586  FD_DV("TokenReader::ReadCharacterData(): tag in buffer");
587  rData=mPeekToken.PreceedingSpace();
589  return;
590  }
591  // do my own reading
593  if(ll<0) {
594  std::stringstream errstr;
595  errstr << "Missformed character data at " << FileLine() << ": " << rData;
596  throw Exception("TokenReader::TokenReader", errstr.str(), 50);
597  }
598  mLineCount+=ll;
599  // prepend peek buffers string value (better: need rewind!)
600  if(mPeekToken.IsString())
601  rData=mPeekToken.StringValue() + " " + rData;
602  // invalidate buffer
604 }
605 
606 // ReadSection()
607 void TokenReader::ReadSection(std::string& rSectionString) {
608  // record current level
609  int clevel = Level();
610  // setup token writer for destination // need a better interface here: provide string buffer
612  tw.Endl(true);
613  // token copy loop
614  while(true) {
615  // see whether we can grab and copy some character data
616  std::string cdata;
617  ReadCharacterData(cdata);
618  tw.WriteCharacterData(cdata);
619  // break end of my level
620  Token token;
621  if(!Peek(token)) break;
622  if(token.IsEnd() && !token.IsBegin() && Level()==clevel)
623  break;
624  // get and copy markup token
625  Get(token);
626  token.PreceedingSpace("n"); // explicit no formating
627  tw.Write(token);
628  }
629  // done
630  rSectionString=tw.Str();
631 }
632 
633 
634 // Line()
635 int TokenReader::Line(void) const {
636  return mLineCount;
637 }
638 
639 // FileLine()
640 std::string TokenReader::FileLine(void) const {
641  if(mFileName!="")
642  return "("+ mFileName + ":" + ToStringInteger(mLineCount) +")";
643  else
644  return "(#" + ToStringInteger(mLineCount) +")";
645 }
646 
647 } // namespace faudes

libFAUDES 2.28a --- 2016.09.13 --- c++ api documentaion by doxygen