HooverChessUtils_PgnReader 0.9.0
Loading...
Searching...
No Matches
pgnparser.h
Go to the documentation of this file.
1// Hoover Chess Utilities / PGN reader
2// Copyright (C) 2022-2025 Sami Kiminki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17#ifndef HOOVER_CHESS_UTILS__PGN_READER__PGNPARSER_H_INCLUDED
18#define HOOVER_CHESS_UTILS__PGN_READER__PGNPARSER_H_INCLUDED
19
20
21#include "chessboard.h"
22#include "pgnreader-types.h"
23#include "pgnscanner.h"
24#include "stringbuilder.h"
25
26#include <array>
27#include <bit>
28#include <cstring>
29#include <format>
30#include <iterator>
31#include <memory>
32#include <string>
33#include <vector>
34
36{
37
40
74{
75public:
77
79 void gameStart() { }
80
82
87 void pgnTag(const std::string_view &key, const std::string_view &value)
88 {
89 static_cast<void>(key);
90 static_cast<void>(value);
91 }
92
95 void moveTextSection() { }
96
98
111 void comment(const std::string_view &str)
112 {
113 static_cast<void>(str);
114 }
115
121 void nag(std::uint8_t nagNum)
122 {
123 static_cast<void>(nagNum);
124 }
125
132 void moveNum(std::uint32_t moveNum)
133 {
134 static_cast<void>(moveNum);
135 }
136
147 void movePawn(SquareSet srcMask, Square dst)
148 {
149 static_cast<void>(srcMask);
150 static_cast<void>(dst);
151 }
152
164 {
165 static_cast<void>(srcMask);
166 static_cast<void>(dst);
167 }
168
180 void movePawnPromo(SquareSet srcMask, Square dst, Piece promo)
181 {
182 static_cast<void>(srcMask);
183 static_cast<void>(dst);
184 static_cast<void>(promo);
185 }
186
198 void movePawnPromoCapture(SquareSet srcMask, Square dst, Piece promo)
199 {
200 static_cast<void>(srcMask);
201 static_cast<void>(dst);
202 static_cast<void>(promo);
203 }
204
216 void moveKnight(SquareSet srcMask, Square dst, bool capture)
217 {
218 static_cast<void>(srcMask);
219 static_cast<void>(dst);
220 static_cast<void>(capture);
221 }
222
234 void moveBishop(SquareSet srcMask, Square dst, bool capture)
235 {
236 static_cast<void>(srcMask);
237 static_cast<void>(dst);
238 static_cast<void>(capture);
239 }
240
252 void moveRook(SquareSet srcMask, Square dst, bool capture)
253 {
254 static_cast<void>(srcMask);
255 static_cast<void>(dst);
256 static_cast<void>(capture);
257 }
258
270 void moveQueen(SquareSet srcMask, Square dst, bool capture)
271 {
272 static_cast<void>(srcMask);
273 static_cast<void>(dst);
274 static_cast<void>(capture);
275 }
276
288 void moveKing(SquareSet srcMask, Square dst, bool capture)
289 {
290 static_cast<void>(srcMask);
291 static_cast<void>(dst);
292 static_cast<void>(capture);
293 }
294
297 {
298 }
299
302 {
303 }
304
307 {
308 }
309
312 {
313 }
314
319 {
320 static_cast<void>(result);
321 }
322
324 void endOfPGN() { }
325};
326
473template <typename T_ActionHandler>
475{
476private:
478 T_ActionHandler &m_actionHandler;
481
483 std::vector<std::string> m_pendingComments { };
484
486 PgnErrorCode errorCode,
487 std::uint32_t expectedTokenMask,
488 PgnScannerToken token)
489 {
490 std::string expectedTokens { };
491
492 while (expectedTokenMask != 0U)
493 {
494 if (!expectedTokens.empty())
495 expectedTokens += " | ";
496
497 expectedTokens += PgnScanner::scannerTokenToString(PgnScannerToken { static_cast<std::uint8_t>(std::countr_zero(expectedTokenMask)) });
498 expectedTokenMask = expectedTokenMask & (expectedTokenMask - 1U);
499 }
500
501 throw PgnError(
502 errorCode,
503 std::format(
504 "Expected token {} but got: {} ({})",
505 expectedTokens, PgnScanner::scannerTokenToString(token), static_cast<std::uint8_t>(token)));
506 }
507
509 {
513 if (token != PgnScannerToken::TAG_KEY) [[unlikely]]
517 token);
518
519 m_strBuilder.appendString(m_scanner.YYText(), m_scanner.YYLeng());
520
521 token = m_scanner.nextToken();
522 if (token != PgnScannerToken::TAG_VALUE) [[unlikely]]
526 token);
527
528 // TAG value string has format "...", so we'll crop the first and the
529 // last chars away. In addition, we need to parse escapes
530 bool escape = false;
531 for (const char c : std::string_view { m_scanner.YYText() + 1, static_cast<std::size_t>(m_scanner.YYLeng() - 2) })
532 {
533 if (escape)
534 {
536 escape = false;
537 }
538 else if (c != '\\')
539 {
541 }
542 else
543 escape = true;
544 }
545
546 m_actionHandler.pgnTag(
549
550 token = m_scanner.nextToken();
551 if (token != PgnScannerToken::TAG_END) [[unlikely]]
555 token);
556 }
557
559 {
560 bool variationAllowed { };
561
562 while (true)
563 {
564 switch (token)
565 {
568 token = m_scanner.nextToken();
569 break;
570
573 token = m_scanner.nextToken();
574 break;
575
578 token = m_scanner.nextToken();
579 break;
580
583 token = parseNagsAfterMove();
584 variationAllowed = true;
585 break;
586
589 token = parseNagsAfterMove();
590 variationAllowed = true;
591 break;
592
595 token = parseNagsAfterMove();
596 variationAllowed = true;
597 break;
598
601 token = parseNagsAfterMove();
602 variationAllowed = true;
603 break;
604
607 token = parseNagsAfterMove();
608 variationAllowed = true;
609 break;
610
613 token = parseNagsAfterMove();
614 variationAllowed = true;
615 break;
616
619 token = parseNagsAfterMove();
620 variationAllowed = true;
621 break;
622
625 token = parseNagsAfterMove();
626 variationAllowed = true;
627 break;
628
631 token = parseNagsAfterMove();
632 variationAllowed = true;
633 break;
634
637 token = parseNagsAfterMove();
638 variationAllowed = true;
639 break;
640
643 token = parseNagsAfterMove();
644 variationAllowed = true;
645 break;
646
648 if (variationAllowed) [[likely]]
649 {
650 token = parseVariation();
651 break;
652 }
653 else [[unlikely]]
654 {
655 throw PgnError(
657 std::format(
658 "VARIATION_START not allowed in a line before move"));
659 }
660
661 default:
662 return token;
663 }
664 }
665 }
666
668 {
669 m_actionHandler.variationStart();
670
672 token = parseLine(token);
673
674 if (token != PgnScannerToken::VARIATION_END) [[unlikely]]
675 {
679 token);
680 }
681
682 m_actionHandler.variationEnd();
683
684 return m_scanner.nextToken();
685 }
686
687 inline void handleMovePawn(const PgnScannerTokenInfo_PAWN_MOVE &move) const
688 {
689 m_actionHandler.movePawn(
690 move.srcMask,
691 move.dstSq);
692 }
693
695 {
696 m_actionHandler.movePawnCapture(
697 move.srcMask,
698 move.dstSq);
699 }
700
702 {
703 m_actionHandler.movePawnPromo(
704 move.srcMask,
705 move.dstSq,
706 move.promoPiece);
707 }
708
710 {
711 m_actionHandler.movePawnPromoCapture(
712 move.srcMask,
713 move.dstSq,
714 move.promoPiece);
715 }
716
718 {
719 m_actionHandler.moveKnight(move.srcMask, move.dstSq, move.capture);
720 }
721
723 {
724 m_actionHandler.moveBishop(move.srcMask, move.dstSq, move.capture);
725 }
726
727 inline void handleMoveRook(const PgnScannerTokenInfo_PIECE_MOVE &move) const
728 {
729 m_actionHandler.moveRook(move.srcMask, move.dstSq, move.capture);
730 }
731
732 inline void handleMoveQueen(const PgnScannerTokenInfo_PIECE_MOVE &move) const
733 {
734 m_actionHandler.moveQueen(move.srcMask, move.dstSq, move.capture);
735 }
736
737 inline void handleMoveKing(const PgnScannerTokenInfo_PIECE_MOVE &move) const
738 {
739 m_actionHandler.moveKing(move.srcMask, move.dstSq, move.capture);
740 }
741
742 inline void handleMoveShortCastle() const
743 {
744 m_actionHandler.moveShortCastle();
745 }
746
747 inline void handleMoveLongCastle() const
748 {
749 m_actionHandler.moveLongCastle();
750 }
751
753 {
754 m_actionHandler.moveNum(moveNum.num);
755 }
756
758 {
760
761 while (token == PgnScannerToken::NAG)
762 {
764 token = m_scanner.nextToken();
765 }
766
767 return token;
768 }
769
771 {
772 while (true)
773 {
774 switch (token)
775 {
778 token = m_scanner.nextToken();
779 break;
780
783 token = m_scanner.nextToken();
784 break;
785
786 default:
787 return token;
788 }
789 }
790 }
791
793 {
795
796 std::size_t pendingNewlines { };
797
798 while (true)
799 {
800 const PgnScannerToken token { m_scanner.nextToken() };
801 switch (token)
802 {
804 if (!m_strBuilder.isEmpty())
805 {
806 while (pendingNewlines > 0U)
807 {
809 --pendingNewlines;
810 }
811 }
812 pendingNewlines = 0U;
813 m_strBuilder.appendString(m_scanner.YYText(), m_scanner.YYLeng());
814 break;
816 ++pendingNewlines;
817 break;
821 else
822 m_pendingComments.push_back(std::string { m_strBuilder.getStringView() });
823
824 return;
825
826 default: [[unlikely]]
832 token);
833 }
834 }
835 }
836
838 {
839 const char *commentStart { m_scanner.YYText() };
840 const char *commentEnd { commentStart + static_cast<std::size_t>(m_scanner.YYLeng()) };
841
842 ++commentStart; // eat the ';'
843
844 // eat the white spaces from the beginning
845 while (commentStart != commentEnd)
846 {
847 switch (*commentStart)
848 {
849 case ' ':
850 case '\t':
851 case '\v':
852 ++commentStart;
853 continue;
854
855 default:
856 break;
857 }
858 break;
859 }
860
861 // eat the white spaces from the end
862 while (commentStart != commentEnd)
863 {
864 switch (*(commentEnd - 1U))
865 {
866 case ' ':
867 case '\t':
868 case '\v':
869 --commentEnd;
870 continue;
871
872 default:
873 break;
874 }
875 break;
876 }
877
878 if (commentStart != commentEnd)
879 {
881 m_actionHandler.comment(std::string_view { commentStart, commentEnd });
882 else
883 m_pendingComments.push_back(std::string { commentStart, commentEnd });
884 }
885 }
886
888 {
889 for (const auto &str : m_pendingComments)
890 m_actionHandler.comment(str);
891
892 m_pendingComments.clear();
893 }
894
895public:
900 PgnParser(PgnScanner &scanner, T_ActionHandler &actionHandler) :
901 m_scanner { scanner },
902 m_actionHandler { actionHandler }
903 {
904 }
905
906 PgnParser(const PgnParser &) = delete;
907 PgnParser(PgnParser &&) = delete;
908 PgnParser &operator = (const PgnParser &) & = delete;
910
911 void parse()
912 {
913 try
914 {
915 // every full iteration parses a game
916 while (true)
917 {
919
920 // PGN ==> GAME* COMMENT* <end_of_file>
921 while (true)
922 {
923 if (token == PgnScannerToken::END_OF_FILE)
924 {
926 m_actionHandler.endOfPGN();
927 return;
928 }
929 else if (token == PgnScannerToken::COMMENT_START)
931 else if (token == PgnScannerToken::COMMENT_TEXT)
933 else
934 break;
935
936 token = m_scanner.nextToken();
937 }
938
939 // GAME = TAGPAIRS MOVETEXT
940 m_actionHandler.gameStart();
941
942 // TAGPAIRS = (COMMENT | TAGPAIR)*
943 while (true)
944 {
945 if (token == PgnScannerToken::TAG_START)
946 {
948 parseTagPair();
949 }
950 else if (token == PgnScannerToken::COMMENT_START)
952 else if (token == PgnScannerToken::COMMENT_TEXT)
954 else
955 break;
956
957 token = m_scanner.nextToken();
958 }
959
960 // MOVETEXT
961 m_actionHandler.moveTextSection();
963 m_inMoveTextSection = true;
964
965 token = parseLine(token);
966
967 if (token != PgnScannerToken::RESULT) [[unlikely]]
971 token);
972
974 m_inMoveTextSection = false;
975 }
976 }
977 catch (const PgnError &ex)
978 {
979 // add position info in the exception
980 throw PgnError(m_scanner, ex);
981 }
982 }
983};
984
986
987}
988
989#endif
PGN error exception.
Definition pgnreader-error.h:70
PGN parser null semantic actions. This is useful only for testing and documentation purposes.
Definition pgnparser.h:74
void moveNum(std::uint32_t moveNum)
Called on move number.
Definition pgnparser.h:132
void moveTextSection()
Called between the end of the tag pair section and the beginning of the move text section.
Definition pgnparser.h:95
void movePawn(SquareSet srcMask, Square dst)
Pawn advancing move (non-promoting)
Definition pgnparser.h:147
void gameStart()
Called on game start before anything else.
Definition pgnparser.h:79
void moveQueen(SquareSet srcMask, Square dst, bool capture)
Queen move.
Definition pgnparser.h:270
void gameTerminated(PgnResult result)
Called on game end.
Definition pgnparser.h:318
void moveShortCastle()
Short castling move.
Definition pgnparser.h:296
void endOfPGN()
Called at the end of the PGN after everything else.
Definition pgnparser.h:324
void movePawnPromoCapture(SquareSet srcMask, Square dst, Piece promo)
Pawn capturing move (promoting)
Definition pgnparser.h:198
void variationEnd()
End of a recursive annotation variation (RAV)
Definition pgnparser.h:311
void comment(const std::string_view &str)
Called on comment.
Definition pgnparser.h:111
void movePawnCapture(SquareSet srcMask, Square dst)
Pawn capturing move (non-promoting)
Definition pgnparser.h:163
void moveLongCastle()
Long castling move.
Definition pgnparser.h:301
void pgnTag(const std::string_view &key, const std::string_view &value)
Called on PGN tag pair.
Definition pgnparser.h:87
void moveKnight(SquareSet srcMask, Square dst, bool capture)
Knight move.
Definition pgnparser.h:216
void variationStart()
Beginning of a recursive annotation variation (RAV)
Definition pgnparser.h:306
void nag(std::uint8_t nagNum)
Called on numeric annotation glyph.
Definition pgnparser.h:121
void moveRook(SquareSet srcMask, Square dst, bool capture)
Rook move.
Definition pgnparser.h:252
void movePawnPromo(SquareSet srcMask, Square dst, Piece promo)
Pawn advancing move (promoting)
Definition pgnparser.h:180
void moveBishop(SquareSet srcMask, Square dst, bool capture)
Bishop move.
Definition pgnparser.h:234
void moveKing(SquareSet srcMask, Square dst, bool capture)
King move.
Definition pgnparser.h:288
The PGN parser.
Definition pgnparser.h:475
void parse()
Definition pgnparser.h:911
void handleMovePawnPromo(const PgnScannerTokenInfo_PAWN_MOVE &move) const
Definition pgnparser.h:701
PgnScannerToken parseLine(PgnScannerToken token)
Definition pgnparser.h:558
StringBuilder m_strBuilder2
Definition pgnparser.h:480
void parseCommentBlock()
Definition pgnparser.h:792
void parseTagPair()
Definition pgnparser.h:508
void flushPendingComments()
Definition pgnparser.h:887
void handleMoveKing(const PgnScannerTokenInfo_PIECE_MOVE &move) const
Definition pgnparser.h:737
bool m_inMoveTextSection
Definition pgnparser.h:482
PgnParser(const PgnParser &)=delete
void unexpectedTokenError(PgnErrorCode errorCode, std::uint32_t expectedTokenMask, PgnScannerToken token)
Definition pgnparser.h:485
void handleMoveRook(const PgnScannerTokenInfo_PIECE_MOVE &move) const
Definition pgnparser.h:727
void handleMoveShortCastle() const
Definition pgnparser.h:742
PgnParser & operator=(const PgnParser &) &=delete
PgnScanner & m_scanner
Definition pgnparser.h:477
void handleMoveQueen(const PgnScannerTokenInfo_PIECE_MOVE &move) const
Definition pgnparser.h:732
PgnScannerToken parseVariation()
Definition pgnparser.h:667
void handleMovePawnPromoCapture(const PgnScannerTokenInfo_PAWN_MOVE &move) const
Definition pgnparser.h:709
void parseSingleLineComment()
Definition pgnparser.h:837
void handleMovePawnCapture(const PgnScannerTokenInfo_PAWN_MOVE &move) const
Definition pgnparser.h:694
PgnParser(PgnScanner &scanner, T_ActionHandler &actionHandler)
Constructor.
Definition pgnparser.h:900
T_ActionHandler & m_actionHandler
Definition pgnparser.h:478
void handleMoveKnight(const PgnScannerTokenInfo_PIECE_MOVE &move) const
Definition pgnparser.h:717
void handleMoveLongCastle() const
Definition pgnparser.h:747
void handleMovePawn(const PgnScannerTokenInfo_PAWN_MOVE &move) const
Definition pgnparser.h:687
void handleMoveNum(const PgnScannerTokenInfo_MOVENUM &moveNum) const
Definition pgnparser.h:752
void handleMoveBishop(const PgnScannerTokenInfo_PIECE_MOVE &move) const
Definition pgnparser.h:722
StringBuilder m_strBuilder
Definition pgnparser.h:479
PgnScannerToken parseNagsAfterMove()
Definition pgnparser.h:757
PgnScannerToken parseComments(PgnScannerToken token)
Definition pgnparser.h:770
std::vector< std::string > m_pendingComments
Definition pgnparser.h:483
The PGN scanner (lexer)
Definition pgnscanner.h:40
const PgnScannerTokenInfo & getTokenInfo() const noexcept
Returns additional information on the token.
Definition pgnscanner.h:125
PgnScannerToken nextToken()
Scans input and returns the next token.
Definition pgnscanner.h:89
static constexpr const char * scannerTokenToString(PgnScannerToken token) noexcept
Returns a string for a scanner token.
Definition pgnscanner.h:136
Set of squares. Implemented using a bit-mask.
Definition chessboard-types-squareset.h:35
void appendString(const char *str, std::size_t len)
void clear() noexcept
Definition stringbuilder.h:59
std::string_view getStringView() const noexcept
Definition stringbuilder.h:54
bool isEmpty() const noexcept
Definition stringbuilder.h:49
PgnErrorCode
Error code.
Definition pgnreader-error.h:35
PgnResult
Game result.
Definition pgnreader-types.h:32
Piece
Named piece.
Definition chessboard-types.h:204
Square
Named square.
Definition chessboard-types.h:122
@ BAD_PGN_TAG
PGN tag parsing failed (parser error)
@ UNEXPECTED_TOKEN
Unexpected (bad) token (parser error)
constexpr std::uint32_t pgnScannerTokenToMaskBit(PgnScannerToken token) noexcept
Definition pgnscannertokens.h:149
PgnScannerToken
PGN scanner token.
Definition pgnscannertokens.h:37
@ VARIATION_START
Variation start ('(')
Definition pgnscannertokens.h:57
@ VARIATION_END
Variation end ('(')
Definition pgnscannertokens.h:60
@ COMMENT_TEXT
Block comment text line OR single line comment.
Definition pgnscannertokens.h:127
@ TAG_START
PGN tag start ('[')
Definition pgnscannertokens.h:45
@ MOVENUM
Move number.
Definition pgnscannertokens.h:65
@ MOVE_PIECE_ROOK
Rook move.
Definition pgnscannertokens.h:100
@ COMMENT_NEWLINE
New line within a block comment.
Definition pgnscannertokens.h:130
@ MOVE_PIECE_KNIGHT
Knight move.
Definition pgnscannertokens.h:90
@ MOVE_PIECE_BISHOP
Bishop move.
Definition pgnscannertokens.h:95
@ MOVE_PAWN
Pawn advance (non-promoting)
Definition pgnscannertokens.h:70
@ TAG_KEY
PGN tag key.
Definition pgnscannertokens.h:48
@ MOVE_PAWN_PROMO
Pawn capture (promoting)
Definition pgnscannertokens.h:80
@ TAG_END
PGN tag end (']')
Definition pgnscannertokens.h:54
@ NAG
Numeric annotation glyph.
Definition pgnscannertokens.h:121
@ MOVE_SHORT_CASTLE
Short castling move.
Definition pgnscannertokens.h:113
@ MOVE_PAWN_CAPTURE
Pawn capture (non-promoting)
Definition pgnscannertokens.h:75
@ COMMENT_END
Block comment end.
Definition pgnscannertokens.h:133
@ END_OF_FILE
End of file.
Definition pgnscannertokens.h:42
@ RESULT
PGN game result (terminator)
Definition pgnscannertokens.h:138
@ MOVE_PIECE_KING
King move.
Definition pgnscannertokens.h:110
@ TAG_VALUE
PGN tag value.
Definition pgnscannertokens.h:51
@ MOVE_LONG_CASTLE
Long castling move.
Definition pgnscannertokens.h:116
@ MOVE_PAWN_PROMO_CAPTURE
Pawn capture (promoting)
Definition pgnscannertokens.h:85
@ COMMENT_START
Block comment start.
Definition pgnscannertokens.h:124
@ MOVE_PIECE_QUEEN
Queen move.
Definition pgnscannertokens.h:105
Definition chessboard-types-squareset.h:30
Additional token info for move number token.
Definition pgnscannertokens.h:192
std::uint32_t num
Move number.
Definition pgnscannertokens.h:194
std::uint8_t nag
Glyph number.
Definition pgnscannertokens.h:203
Additional token info for pawn move.
Definition pgnscannertokens.h:156
Piece promoPiece
Promotion piece for PgnScannerToken::MOVE_PAWN_PROMO and PgnScannerToken::MOVE_PAWN_PROMO_CAPTURE.
Definition pgnscannertokens.h:169
Square dstSq
Destination square of the move.
Definition pgnscannertokens.h:166
SquareSet srcMask
Allowed source squares per the move specification.
Definition pgnscannertokens.h:163
Additional token info for piece move.
Definition pgnscannertokens.h:174
bool capture
Whether move is a capture move.
Definition pgnscannertokens.h:184
Square dstSq
Destination square of the move.
Definition pgnscannertokens.h:187
SquareSet srcMask
Allowed source squares per the move specification.
Definition pgnscannertokens.h:181
PgnResult result
Game result.
Definition pgnscannertokens.h:210
PgnScannerTokenInfo_MOVENUM moveNum
Definition pgnscannertokens.h:228
PgnScannerTokenInfo_PIECE_MOVE pieceMove
Definition pgnscannertokens.h:227
PgnScannerTokenInfo_RESULT result
Definition pgnscannertokens.h:230
PgnScannerTokenInfo_NAG nag
Definition pgnscannertokens.h:229
PgnScannerTokenInfo_PAWN_MOVE pawnMove
Definition pgnscannertokens.h:226