"Writing an Interpreter in Go" in Zig, part 4 🔗

July 15, 2024

This post is part of the "Writing an Interpreter in Go" in Zig series.

A small change before proceeding with chapter 2 and parsing.

Using Zig’s optional type

In the Go version, and in our Zig version so far, the type of the current lookahead character in the lexer is a unsigned 8-bit integer (byte in Go, u8 in Zig). To represent no more input or EOF, we’ve been storing a 0 in that field.

This is fine since we know how to interpret that and it’s not exposed outside as an API.

But the sense we’re trying to encode is that either there is a character/byte from the input, or there is not. Zig allows any type to be optional by prepending it with a ?. For example, ?u8 is an optional u8.

It’s pretty simple to update our lexer to use optionals. We test for null in the appropriate places, and Zig also has a convenient syntax where if expressions can capture the non-optional value as a variable for the consequent block.

--- a/src/Lexer.zig
+++ b/src/Lexer.zig
@@ -7,7 +7,7 @@
 input: []const u8,
 position: usize, // current position in input (current char)
 readPosition: usize, // current reading position in input (after current char)
-ch: u8,
+ch: ?u8,
 
 const Self = @This();
 
@@ -16,7 +16,7 @@
         .input = input,
         .position = 0,
         .readPosition = 0,
-        .ch = 0,
+        .ch = null,
     };
     lexer.readChar();
     return lexer;
@@ -28,62 +28,64 @@
 
 pub fn next(self: *Self) Token {
     self.skipWhitespace();
-    const token = switch (self.ch) {
-        '=' => blk: {
-            if (self.peekChar() == '=') {
-                self.readChar();
-                break :blk Token{ .token_type = .eq, .literal = "==" };
-            }
-            break :blk newToken(.assign, "=");
-        },
-        '+' => newToken(.plus, "+"),
-        '-' => newToken(.minus, "-"),
-        '!' => blk: {
-            if (self.peekChar() == '=') {
-                self.readChar();
-                break :blk Token{ .token_type = .not_eq, .literal = "!=" };
-            }
-            break :blk newToken(.bang, "!");
-        },
-        '/' => newToken(.slash, "/"),
-        '*' => newToken(.asterisk, "*"),
-        '<' => newToken(.lt, "<"),
-        '>' => newToken(.gt, ">"),
-        ';' => newToken(.semicolon, ";"),
-        '(' => newToken(.lparen, "("),
-        ')' => newToken(.rparen, ")"),
-        '{' => newToken(.lbrace, "{"),
-        '}' => newToken(.rbrace, "}"),
-        ',' => newToken(.comma, ","),
-        0 => newToken(.eof, ""),
-        else => blk: {
-            if (ascii.isAlphabetic(self.ch) or self.ch == '_') {
-                var token: Token = undefined;
-                token.literal = self.readIdentifier();
-                token.token_type = lookupIdent(token.literal);
-                return token;
-            } else if (ascii.isDigit(self.ch)) {
-                var token: Token = undefined;
-                token.literal = self.readNumber();
-                token.token_type = .int;
-                return token;
-            }
-            break :blk newToken(.illegal, "");
-        },
-    };
-    self.readChar();
-    return token;
+    if (self.ch) |ch| {
+        const token = switch (ch) {
+            '=' => blk: {
+                if (self.peekChar() == '=') {
+                    self.readChar();
+                    break :blk Token{ .token_type = .eq, .literal = "==" };
+                }
+                break :blk newToken(.assign, "=");
+            },
+            '+' => newToken(.plus, "+"),
+            '-' => newToken(.minus, "-"),
+            '!' => blk: {
+                if (self.peekChar() == '=') {
+                    self.readChar();
+                    break :blk Token{ .token_type = .not_eq, .literal = "!=" };
+                }
+                break :blk newToken(.bang, "!");
+            },
+            '/' => newToken(.slash, "/"),
+            '*' => newToken(.asterisk, "*"),
+            '<' => newToken(.lt, "<"),
+            '>' => newToken(.gt, ">"),
+            ';' => newToken(.semicolon, ";"),
+            '(' => newToken(.lparen, "("),
+            ')' => newToken(.rparen, ")"),
+            '{' => newToken(.lbrace, "{"),
+            '}' => newToken(.rbrace, "}"),
+            ',' => newToken(.comma, ","),
+            else => blk: {
+                if (ascii.isAlphabetic(ch) or ch == '_') {
+                    var token: Token = undefined;
+                    token.literal = self.readIdentifier();
+                    token.token_type = lookupIdent(token.literal);
+                    return token;
+                } else if (ascii.isDigit(ch)) {
+                    var token: Token = undefined;
+                    token.literal = self.readNumber();
+                    token.token_type = .int;
+                    return token;
+                }
+                break :blk newToken(.illegal, "");
+            },
+        };
+        self.readChar();
+        return token;
+    }
+    return newToken(.eof, "");
 }
 
 fn skipWhitespace(self: *Self) void {
-    while (ascii.isWhitespace(self.ch)) {
+    while (self.ch != null and ascii.isWhitespace(self.ch.?)) {
         self.readChar();
     }
 }
 
 fn readIdentifier(self: *Self) []const u8 {
     const p = self.position;
-    while (ascii.isAlphabetic(self.ch) or self.ch == '_') {
+    while (self.ch != null and ascii.isAlphabetic(self.ch.?) or self.ch.? == '_') {
         self.readChar();
     }
     return self.input[p..self.position];
@@ -91,7 +93,7 @@
 
 fn readNumber(self: *Self) []const u8 {
     const p = self.position;
-    while (ascii.isDigit(self.ch)) {
+    while (self.ch != null and ascii.isDigit(self.ch.?)) {
         self.readChar();
     }
     return self.input[p..self.position];
@@ -99,7 +101,7 @@
 
 fn readChar(self: *Self) void {
     if (self.readPosition >= self.input.len) {
-        self.ch = 0;
+        self.ch = null;
     } else {
         self.ch = self.input[self.readPosition];
     }
@@ -107,9 +109,9 @@
     self.readPosition += 1;
 }
 
-fn peekChar(self: *Self) u8 {
+fn peekChar(self: *Self) ?u8 {
     if (self.readPosition >= self.input.len) {
-        return 0;
+        return null;
     }
     return self.input[self.readPosition];
 }

There’s no clear win here using an optional type other than not having a sentinel value we need to know how to interpret.

This series

"Writing an Interpreter in Go" in Zig, part 1
"Writing an Interpreter in Go" in Zig, part 2
"Writing an Interpreter in Go" in Zig, part 3
"Writing an Interpreter in Go" in Zig, part 4
"Writing an Interpreter in Go" in Zig, part 5
"Writing an Interpreter in Go" in Zig, part 6