HooverChessUtils_PgnReader 0.9.0
Loading...
Searching...
No Matches
pgnscanner.h
Go to the documentation of this file.
1// Hoover Chess Utilities / PGN reader
2// Copyright (C) 2022-2025 Sami Kiminki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17#ifndef HOOVER_CHESS_UTILS__PGN_READER__PGNSCANNER_H_INCLUDED
18#define HOOVER_CHESS_UTILS__PGN_READER__PGNSCANNER_H_INCLUDED
19
20#include "pgnreader-error.h"
21#include "pgnscannertokens.h"
22
23#include <FlexLexer.h>
24
25#include <algorithm>
26#include <array>
27#include <cinttypes>
28#include <cstring>
29#include <format>
30#include <string_view>
31
33{
34
37
39class PgnScanner : public ::yyFlexLexer
40{
41private:
42 const char *m_inputData;
43 std::size_t m_inputLeft;
46
47public:
52 PgnScanner(const char *inputData, std::size_t inputLen) noexcept :
53 yyFlexLexer { nullptr, nullptr },
54 m_inputData { inputData },
55 m_inputLeft { inputLen },
56 m_tokenInfo { }
57 {
58 }
59
60 PgnScanner(const PgnScanner &) = delete;
61 PgnScanner(PgnScanner &&) = delete;
62 PgnScanner & operator = (const PgnScanner &) & = delete;
64
76
90 {
92
93 if (m_curToken == PgnScannerToken::ERROR) [[unlikely]]
94 {
95 throw PgnError(
97 std::format(
98 "{}: '{}'",
99 getTokenInfo().error.errorMessage,
100 std::string_view { YYText(), static_cast<std::size_t>(YYLeng()) }));
101 }
102
103 return m_curToken;
104 }
105
117 inline PgnScannerToken getCurrentToken() const noexcept
118 {
119 return m_curToken;
120 }
121
125 inline const PgnScannerTokenInfo &getTokenInfo() const noexcept
126 {
127 return m_tokenInfo;
128 }
129
130#define C(tok) case tok: return #tok
131
136 static constexpr const char *scannerTokenToString(PgnScannerToken token) noexcept
137 {
138 switch (token)
139 {
140 C(NONE);
141 C(END_OF_FILE);
142 C(TAG_START);
143 C(TAG_KEY);
144 C(TAG_VALUE);
145 C(TAG_END);
148 C(MOVENUM);
149 C(MOVE_PAWN);
160 C(NAG);
164 C(COMMENT_END);
165 C(RESULT);
166 C(ERROR);
167 default:
168 assert(false);
169 return "???";
170 }
171 }
172#undef C
173
174protected:
175
183 inline int LexerInput(char *buf, int maxSize)
184 {
185 return LexerInput(buf, static_cast<std::size_t>(maxSize));
186 }
187
195 std::size_t LexerInput(char *buf, std::size_t maxSize)
196 {
197 const std::size_t copySize { std::min(maxSize, m_inputLeft) };
198
199 std::memcpy(buf, m_inputData, copySize);
200 m_inputData += copySize;
201 m_inputLeft -= copySize;
202
203 return copySize;
204 }
205
206private:
211
228 template <typename RetType, bool digitsOnly>
229 RetType asciiToUnsigned(const char *str, const char *end, const char *tokenType)
230 {
231 RetType ret { };
232 RetType prev { };
233
234 while (str != end)
235 {
236 char c = *str;
237
238 if constexpr (!digitsOnly)
239 {
240 if (static_cast<unsigned char>(c - '0') > 9U)
241 break;
242 }
243
244 ret *= 10U;
245 ret += static_cast<RetType>(c - '0');
246
247 if (ret < prev)
248 throw PgnError(
250 std::format("Integer overflow when tokenizing {}", tokenType));
251
252 prev = ret;
253 ++str;
254 }
255
256 return ret;
257 }
258
264 static constexpr inline Square charCoordToSq(char colChar, char rowChar) noexcept
265 {
266 const std::uint8_t col { static_cast<std::uint8_t>(colChar - 'a') };
267 const std::uint8_t row { static_cast<std::uint8_t>(rowChar - '1') };
268
269 return makeSquare(col, row);
270 }
271
276 static constexpr inline SquareSet colCharToMask(char colChar) noexcept
277 {
278 const std::uint8_t col { static_cast<std::uint8_t>(colChar - 'a') };
279
280 return SquareSet::column(col);
281 }
282
287 static constexpr inline SquareSet rowCharToMask(char rowChar) noexcept
288 {
289 const std::uint8_t row { static_cast<std::uint8_t>(rowChar - '1') };
290
291 return SquareSet::row(row);
292 }
293
294 static constexpr std::array<Piece, 32> ctCharToPieceTable {{
295 Piece::NONE, // 0
296 Piece::NONE, // 1
297 Piece::BISHOP, // 2
298 Piece::NONE, // 3
299 Piece::NONE, // 4
300 Piece::NONE, // 5
301 Piece::NONE, // 6
302 Piece::NONE, // 7
303 Piece::NONE, // 8
304 Piece::NONE, // 9
305 Piece::NONE, // 10
306 Piece::NONE, // 11
307 Piece::NONE, // 12
308 Piece::NONE, // 13
309 Piece::KNIGHT, // 14
310 Piece::NONE, // 15
311 Piece::NONE, // 16
312 Piece::QUEEN, // 17
313 Piece::ROOK, // 18
314 Piece::NONE, // 19
315 Piece::NONE, // 21
316 Piece::NONE, // 22
317 Piece::NONE, // 23
318 Piece::NONE, // 24
319 Piece::NONE, // 25
320 Piece::NONE, // 26
321 Piece::NONE, // 27
322 Piece::NONE, // 28
323 Piece::NONE, // 29
324 Piece::NONE, // 30
325 Piece::NONE, // 31
326 }};
327
361
362 // expect char to be one of: 'B', 'R', 'N', 'Q'
363 static constexpr inline Piece getPieceForChar(char c) noexcept
364 {
365 return ctCharToPieceTable[static_cast<std::uint8_t>(c) % ctCharToPieceTable.size()];
366 }
367
368 // expect char to be one of: 'B', 'R', 'N', 'Q', 'K'
369 static constexpr inline PgnScannerToken getMovePieceScannerTokenForChar(char c) noexcept
370 {
371 return ctCharToMovePieceTable[static_cast<std::uint8_t>(c) % ctCharToMovePieceTable.size()];
372 }
373
374 inline void setTokenInfo_MOVENUM(const char *str, const char *end);
375 inline void setTokenInfo_PAWN_MOVE(SquareSet srcMask, Square dstSq, Piece promoPiece);
376 inline void setTokenInfo_PIECE_MOVE(SquareSet srcMask, bool capture, Square dstSq);
377 inline void setTokenInfo_NAG(std::uint8_t nag);
378 inline void setTokenInfo_RESULT(PgnResult result);
379 inline void setTokenInfo_ERROR(const char *errorMessage);
380
381 inline PgnScannerToken tokenizePieceMove(std::string_view str);
382 inline PgnScannerToken tokenizeUnusualPawnMove(std::string_view str);
383};
384
386
387}
388
389#endif
PGN error exception.
Definition pgnreader-error.h:70
The PGN scanner (lexer)
Definition pgnscanner.h:40
static constexpr SquareSet colCharToMask(char colChar) noexcept
Translates column coordinate (file) to a square set.
Definition pgnscanner.h:276
PgnScanner(const char *inputData, std::size_t inputLen) noexcept
Constructor.
Definition pgnscanner.h:52
RetType asciiToUnsigned(const char *str, const char *end, const char *tokenType)
Ascii to unsigned integer conversion.
Definition pgnscanner.h:229
static constexpr PgnScannerToken getMovePieceScannerTokenForChar(char c) noexcept
Definition pgnscanner.h:369
static constexpr std::array< PgnScannerToken, 32 > ctCharToMovePieceTable
Definition pgnscanner.h:328
const char * m_inputData
Definition pgnscanner.h:42
void setTokenInfo_RESULT(PgnResult result)
PgnScannerToken m_curToken
Definition pgnscanner.h:44
PgnScannerToken tokenizeUnusualPawnMove(std::string_view str)
PgnScannerToken nextTokenNoThrowOnErrorToken()
Scans input and returns the next token. On bad input, error token is returned instead of throwing an ...
Definition pgnscanner.h:71
PgnScannerToken tokenizePieceMove(std::string_view str)
void setTokenInfo_ERROR(const char *errorMessage)
int LexerInput(char *buf, int maxSize)
Provides more input to the parent class (general flex version)
Definition pgnscanner.h:183
PgnScanner & operator=(const PgnScanner &) &=delete
void setTokenInfo_MOVENUM(const char *str, const char *end)
void setTokenInfo_PIECE_MOVE(SquareSet srcMask, bool capture, Square dstSq)
PgnScannerTokenInfo m_tokenInfo
Definition pgnscanner.h:45
static constexpr SquareSet rowCharToMask(char rowChar) noexcept
Translates row coordinate (rank) to a square set.
Definition pgnscanner.h:287
const PgnScannerTokenInfo & getTokenInfo() const noexcept
Returns additional information on the token.
Definition pgnscanner.h:125
static constexpr Piece getPieceForChar(char c) noexcept
Definition pgnscanner.h:363
std::size_t m_inputLeft
Definition pgnscanner.h:43
PgnScannerToken getCurrentToken() const noexcept
Returns the previously scanned token.
Definition pgnscanner.h:117
PgnScannerToken nextToken()
Scans input and returns the next token.
Definition pgnscanner.h:89
static constexpr const char * scannerTokenToString(PgnScannerToken token) noexcept
Returns a string for a scanner token.
Definition pgnscanner.h:136
void setTokenInfo_PAWN_MOVE(SquareSet srcMask, Square dstSq, Piece promoPiece)
static constexpr std::array< Piece, 32 > ctCharToPieceTable
Definition pgnscanner.h:294
static constexpr Square charCoordToSq(char colChar, char rowChar) noexcept
Translates character coordinates to a square.
Definition pgnscanner.h:264
PgnScanner(const PgnScanner &)=delete
PgnScannerToken yylexex()
The generated lexer.
std::size_t LexerInput(char *buf, std::size_t maxSize)
Provides more input to the parent class (Apple flex version)
Definition pgnscanner.h:195
Set of squares. Implemented using a bit-mask.
Definition chessboard-types-squareset.h:35
static constexpr SquareSet column(RowColumn col) noexcept
Returns a set of squares in column number col.
Definition chessboard-types-squareset.h:471
static constexpr SquareSet row(RowColumn row) noexcept
Returns a set of squares in row number row.
Definition chessboard-types-squareset.h:485
PgnResult
Game result.
Definition pgnreader-types.h:32
Piece
Named piece.
Definition chessboard-types.h:204
Square
Named square.
Definition chessboard-types.h:122
constexpr Square makeSquare(RowColumn col, RowColumn row) noexcept
Constructs a square from column and row.
Definition chessboard-types.h:316
@ BAD_CHARACTER
Unexpected character in PGN input (tokenizer error)
@ NONE
Value representing no piece.
PgnScannerToken
PGN scanner token.
Definition pgnscannertokens.h:37
@ VARIATION_START
Variation start ('(')
Definition pgnscannertokens.h:57
@ VARIATION_END
Variation end ('(')
Definition pgnscannertokens.h:60
@ COMMENT_TEXT
Block comment text line OR single line comment.
Definition pgnscannertokens.h:127
@ TAG_START
PGN tag start ('[')
Definition pgnscannertokens.h:45
@ MOVENUM
Move number.
Definition pgnscannertokens.h:65
@ MOVE_PIECE_ROOK
Rook move.
Definition pgnscannertokens.h:100
@ ERROR
Tokenizer error.
Definition pgnscannertokens.h:146
@ COMMENT_NEWLINE
New line within a block comment.
Definition pgnscannertokens.h:130
@ MOVE_PIECE_KNIGHT
Knight move.
Definition pgnscannertokens.h:90
@ MOVE_PIECE_BISHOP
Bishop move.
Definition pgnscannertokens.h:95
@ MOVE_PAWN
Pawn advance (non-promoting)
Definition pgnscannertokens.h:70
@ TAG_KEY
PGN tag key.
Definition pgnscannertokens.h:48
@ MOVE_PAWN_PROMO
Pawn capture (promoting)
Definition pgnscannertokens.h:80
@ TAG_END
PGN tag end (']')
Definition pgnscannertokens.h:54
@ NAG
Numeric annotation glyph.
Definition pgnscannertokens.h:121
@ MOVE_SHORT_CASTLE
Short castling move.
Definition pgnscannertokens.h:113
@ MOVE_PAWN_CAPTURE
Pawn capture (non-promoting)
Definition pgnscannertokens.h:75
@ NONE
Null token (placeholder only)
Definition pgnscannertokens.h:39
@ COMMENT_END
Block comment end.
Definition pgnscannertokens.h:133
@ END_OF_FILE
End of file.
Definition pgnscannertokens.h:42
@ RESULT
PGN game result (terminator)
Definition pgnscannertokens.h:138
@ MOVE_PIECE_KING
King move.
Definition pgnscannertokens.h:110
@ TAG_VALUE
PGN tag value.
Definition pgnscannertokens.h:51
@ MOVE_LONG_CASTLE
Long castling move.
Definition pgnscannertokens.h:116
@ MOVE_PAWN_PROMO_CAPTURE
Pawn capture (promoting)
Definition pgnscannertokens.h:85
@ COMMENT_START
Block comment start.
Definition pgnscannertokens.h:124
@ MOVE_PIECE_QUEEN
Queen move.
Definition pgnscannertokens.h:105
Definition chessboard-types-squareset.h:30
#define C(tok)
Definition pgnscanner.h:130
Union of all additional scanner information. The applicable union member depends the PGN scanner toke...
Definition pgnscannertokens.h:225