DLR Visual Basic Scanner

In my research of how to develop a language I came across several different compilers on Codeplex. In my development of a compiler I am using the following design

image

In order to keep things simple I am following the grammar of the Visual Basic .NET language. I have started development and have managed to the scanner written, source code below; I based it upon Joel Pobar’s Nua. I am still fine turning and learning more about the proper way to develop a compiler.

 

Imports Microsoft.Scripting
Imports Microsoft.Scripting.Runtime

''' <summary>
''' A lexical analyzer for GFN. It produces a stream of lexical tokens.
''' </summary>
Public Class Scanner
    ' Buffer used to extract and process tokens
    Private _buffer As TokenizerBuffer
    ' Used to track errors
    Private _errors As ErrorSink
    ' Source code to be read.
    Private _source As SourceUnit
    ' Token being processed
    Private _token As Token

    ''' <summary>
    ''' Fetches the current token without advancing the stream position
    ''' </summary>
    ''' <returns>The current token.</returns>
    Public ReadOnly Property Peek() As Token
        Get
            If (Me._token Is Nothing OrElse Me._token.Type = TokenType.None) Then
                Me._token = ReadToken()
            End If

            Return Me._token
        End Get
    End Property ' Peek

    ''' <summary>
    ''' Constructs a scanner for the specified TextReader.
    ''' </summary>
    ''' <param name="source">Represents the source code to be processed.</param>
    Public Sub New(ByVal errors As ErrorSink, ByVal source As SourceUnit)
        Me._errors = errors
        Me._source = source

        If (Me._source Is Nothing) Then Throw New ArgumentNullException("SourceUnit")

        ' multiEolns - Whether to allow multiple forms of EOLN If false only '\n' is treated as a line separator otherwise '\n', '\r\n' and '\r' are treated as separators
        Me._buffer = New TokenizerBuffer(Me._source.GetReader(), SourceLocation.MinValue, 1024, True)
    End Sub ' New

    ''' <summary>
    ''' Reads the next token available in the stream.
    ''' </summary>
    ''' <returns>Next token available in the stream.</returns>
    Public Function Read() As Token
        Dim readToken As Token = Nothing

        If (Me._token.Type = TokenType.None) Then
            readToken = Me.ReadToken()
            Me._token = Me.ReadToken()
            Return readToken
        End If

        readToken = Me._token
        Me._token = Me.ReadToken()
        Return readToken
    End Function ' Read

    ''' <summary>
    ''' Read the next avaliable token in the stream.
    ''' </summary>
    ''' <returns>Next avaliable token.</returns>
    Private Function ReadToken() As Token
        Dim token As Token = Nothing
        Dim nchr As Char = Nothing

        ' Discard any white spaces.
        While (Me._buffer.Peek <> -1 AndAlso Char.IsWhiteSpace(ChrW(Me._buffer.Peek)))
            Me._buffer.Read()
            ' Buffer can drop current token.
            Me._buffer.DiscardToken()
        End While

        ' Has the end of the buffer been reached?
        If (Me._buffer.Peek = -1) Then
            Me._buffer.MarkTokenEnd(True)
            token = New EndOfStreamToken(New SourceSpan(Me._buffer.TokenStart, Me._buffer.TokenEnd))
            ' Buffer can drop current token.
            Me._buffer.DiscardToken()
            Return token
        End If

        ' Read the first character avaliable in the buffer. 
        nchr = ChrW(Me._buffer.Peek())

        If (Char.IsLetter(nchr) OrElse nchr = "_"c) Then
            Return Me.ScanKeywordOrIdentifier()
        ElseIf Char.IsDigit(nchr) Then
            Return Me.ScanNumericLiteral()
        ElseIf (nchr = """"c) Then
            Return Me.ScanStringLiteral()
        ElseIf (nchr = "="c) Then
            Me._buffer.Read()
            Me._buffer.MarkTokenEnd(False)
            token = New PunctuatorToken(New SourceSpan(Me._buffer.TokenStart, Me._buffer.TokenEnd), TokenType.Equals)
            ' Buffer can drop current token.
            Me._buffer.DiscardToken()
            Return token
        ElseIf ("+*/^(){}".IndexOf(nchr) <> -1) Then
            Dim type As TokenType = TokenType.None

            ' Single-character punctuation.
            Select Case Me._buffer.Read()
                Case AscW("+")
                    type = TokenType.Plus
                Case AscW("*")
                    type = TokenType.Star
                Case AscW("/")
                    type = TokenType.ForwardSlash
                Case AscW("^")
                    type = TokenType.Caret
                Case AscW("(")
                    type = TokenType.LeftParenthesis
                Case AscW(")")
                    type = TokenType.RightParenthesis
                Case AscW("{")
                    type = TokenType.LeftCurlyBrace
                Case AscW("}")
                    type = TokenType.RightCurlyBrace
            End Select

            Me._buffer.MarkTokenEnd(False)
            token = New PunctuatorToken(New SourceSpan(Me._buffer.TokenStart, Me._buffer.TokenEnd), type)
            ' Buffer can drop current token.
            Me._buffer.DiscardToken()
            Return token
        Else
            ' An invalid character has been discovered. 
            Me._buffer.MarkTokenEnd(False)
            token = New ErrorToken(SyntaxErrorType.InvalidCharacter, New SourceSpan(Me._buffer.TokenStart, Me._buffer.TokenEnd))
            ' Buffer can drop current token.
            Me._buffer.DiscardToken()
            Return token
        End If
    End Function ' ReadNextToken 

    ''' <summary>
    ''' Scans in all the digits in the numeric literal.
    ''' </summary>
    ''' <param name="acc"></param>
    Private Sub ScanDigit(ByRef acc As Text.StringBuilder)
        Dim nchr = ChrW(Me._buffer.Peek)
        If (Me._buffer.Peek = -1 OrElse Not Char.IsDigit(nchr)) Then
            Me._errors.Add(Me._source, "Expected digits", New SourceSpan(Me._buffer.TokenStart, Me._buffer.TokenEnd), 0, Severity.FatalError)
            ' Invalid numeric constant. 
            acc.Append("0")
        Else
            While (Me._buffer.Peek <> -1 AndAlso Char.IsDigit(nchr))
                acc.Append(Chr(Me._buffer.Read()))
                nchr = ChrW(Me._buffer.Peek)
            End While
        End If
    End Sub ' ScanDigit

    ''' <summary>
    ''' Scans in the identifier.
    ''' </summary>
    ''' <returns>Token associated with the identifier.</returns>
    Private Function ScanIdentifier() As Token
        Dim acc As New Text.StringBuilder()
        Dim nchr As Char = Nothing
        Dim token As Token = Nothing


        nchr = ChrW(Me._buffer.Peek())
        While (Me._buffer.Peek <> -1 AndAlso (Char.IsLetterOrDigit(nchr) OrElse nchr = "_"c))
            acc.Append(ChrW(Me._buffer.Read()))
            nchr = ChrW(Me._buffer.Peek())
        End While

        Me._buffer.MarkTokenEnd(False)
        token = New IdentifierToken(acc.ToString(), New SourceSpan(Me._buffer.TokenStart, Me._buffer.TokenEnd), TokenType.Identifier)
        ' Buffer can drop current token.
        Me._buffer.DiscardToken()
        Return token
    End Function ' ScanIdentifier

    ''' <summary>
    ''' Identifies the keyword or identifier.
    ''' </summary>
    ''' <returns>Token associated with the given keyword or identifier.</returns>
    Private Function ScanKeywordOrIdentifier() As Token
        Dim identifier As IdentifierToken = Me.ScanIdentifier()
        Dim token As Token = Nothing

        Me._buffer.MarkTokenEnd(False)
        token = New IdentifierToken(identifier.Identifier, New SourceSpan(Me._buffer.TokenStart, Me._buffer.TokenEnd), IdentifierToken.TokenTypeFromString(identifier.Identifier))
        ' Buffer can drop current token.
        Me._buffer.DiscardToken()
        Return token
    End Function ' ScanKeywordOrIdentifier

    ''' <summary>
    ''' Scans in a numeric literal.
    ''' </summary>
    ''' <returns>Token associated with the numeric literal.</returns>
    Private Function ScanNumericLiteral() As Token
        Dim acc As New Text.StringBuilder()
        Dim token As Token = Nothing

        Me.ScanDigit(acc)
        Me._buffer.MarkTokenEnd(False)
        token = New IntegerLiteralToken(Integer.Parse(acc.ToString()), New SourceSpan(Me._buffer.TokenStart, Me._buffer.TokenEnd))
        ' Buffer can drop current token.
        Me._buffer.DiscardToken()
        Return token
    End Function ' ScanNumericLiteral

    ''' <summary>
    ''' Scans in a string literal.
    ''' </summary>
    ''' <returns>Token associated with the string literal.</returns>
    Private Function ScanStringLiteral() As Token
        Dim acc As New Text.StringBuilder()
        Dim nchr As Char = Nothing
        Dim token As Token = Nothing

        ' Discard the initial quote. 
        Me._buffer.Read()

        ' Has the end of the buffer been reached?
        If (Me._buffer.Peek = -1) Then
            Me._errors.Add(Me._source, "Unterminated string literal", New SourceSpan(Me._buffer.TokenStart, Me._buffer.TokenEnd), 0, Severity.FatalError)
            Me._buffer.MarkTokenEnd(False)
            token = New ErrorToken(SyntaxErrorType.InvalidStringLiteral, New SourceSpan(Me._buffer.TokenStart, Me._buffer.TokenEnd))
            ' Buffer can drop current token.
            Me._buffer.DiscardToken()
            Return token
        End If

        nchr = ChrW(Me._buffer.Peek)

        ' Read until the terminating quote is read. 
        While (Not nchr = """"c)
            acc.Append(ChrW(Me._buffer.Read()))

            ' Has the end of the buffer been reached?
            If (Me._buffer.Peek = -1) Then
                Me._errors.Add(Me._source, "Unterminated string literal", New SourceSpan(Me._buffer.TokenStart, Me._buffer.TokenEnd), 0, Severity.FatalError)
                Me._buffer.MarkTokenEnd(False)
                token = New ErrorToken(SyntaxErrorType.InvalidStringLiteral, New SourceSpan(Me._buffer.TokenStart, Me._buffer.TokenEnd))
                ' Buffer can drop current token.
                Me._buffer.DiscardToken()
                Return token
            Else
                nchr = ChrW(Me._buffer.Peek)
            End If
        End While

        ' Discard the terminating quote.
        Me._buffer.Read()

        Me._buffer.MarkTokenEnd(False)
        token = New StringLiteralToken(acc.ToString(), New SourceSpan(Me._buffer.TokenStart, Me._buffer.TokenEnd))
        ' Buffer can drop current token.
        Me._buffer.DiscardToken()
        Return token
    End Function ' ScanStringLiteral
End Class' Scanner
Published Sun, Nov 2 2008 23:23 by Isaiah
Filed under: ,

Comments

# Консалтинг

А если посмотреть на это с другой точки зрения то не все так гладко получается

Monday, October 05, 2009 4:24 AM by Sergio

Leave a Comment

(required) 
(required) 
(optional)
(required)