107 lines
3.0 KiB
Go
107 lines
3.0 KiB
Go
package tokenizer
|
|
|
|
import "regexp"
|
|
|
|
func (t *Tokenizer) initializeLanguages() {
|
|
t.addGoLanguage()
|
|
t.addJavaScriptLanguage()
|
|
t.addMarkdownLanguage()
|
|
t.addTextLanguage()
|
|
}
|
|
|
|
func (t *Tokenizer) addGoLanguage() {
|
|
t.languages["go"] = &Language{
|
|
Name: "go",
|
|
Rules: []Rule{
|
|
// Comments
|
|
{regexp.MustCompile(`//.*$`), "comment", 100},
|
|
{regexp.MustCompile(`/\*[\s\S]*?\*/`), "comment", 100},
|
|
|
|
// Strings
|
|
{regexp.MustCompile(`"(?:[^"\\]|\\.)*"`), "string", 90},
|
|
{regexp.MustCompile("`[^`]*`"), "string", 90},
|
|
|
|
// Keywords
|
|
{regexp.MustCompile(`\b(package|import|func|var|const|type|struct|interface|if|else|for|range|switch|case|default|return|break|continue|go|defer|chan|select)\b`), "keyword", 80},
|
|
|
|
// Types
|
|
{regexp.MustCompile(`\b(int|int8|int16|int32|int64|uint|uint8|uint16|uint32|uint64|float32|float64|string|bool|byte|rune|error)\b`), "type", 70},
|
|
|
|
// Numbers
|
|
{regexp.MustCompile(`\b\d+(\.\d+)?\b`), "number", 60},
|
|
|
|
// Functions
|
|
{regexp.MustCompile(`\b([a-zA-Z_][a-zA-Z0-9_]*)\s*\(`), "function", 50},
|
|
|
|
// Default
|
|
{regexp.MustCompile(`[a-zA-Z_][a-zA-Z0-9_]*`), "identifier", 10},
|
|
},
|
|
}
|
|
}
|
|
|
|
func (t *Tokenizer) addMarkdownLanguage() {
|
|
t.languages["markdown"] = &Language{
|
|
Name: "markdown",
|
|
Rules: []Rule{
|
|
// Headers
|
|
{regexp.MustCompile(`^#{1,6}\s+.*$`), "header", 100},
|
|
|
|
// Code blocks
|
|
{regexp.MustCompile("```[\\s\\S]*?```"), "code-block", 95},
|
|
{regexp.MustCompile("`[^`]+`"), "code-inline", 90},
|
|
|
|
// Bold/Italic
|
|
{regexp.MustCompile(`\*\*[^*]+\*\*`), "bold", 85},
|
|
{regexp.MustCompile(`\*[^*]+\*`), "italic", 80},
|
|
|
|
// Links
|
|
{regexp.MustCompile(`\[([^\]]+)\]\([^)]+\)`), "link", 75},
|
|
|
|
// Lists
|
|
{regexp.MustCompile(`^\s*[-*+]\s+`), "list-marker", 70},
|
|
{regexp.MustCompile(`^\s*\d+\.\s+`), "list-marker", 70},
|
|
},
|
|
}
|
|
}
|
|
|
|
func (t *Tokenizer) addJavaScriptLanguage() {
|
|
t.languages["javascript"] = &Language{
|
|
Name: "javascript",
|
|
Rules: []Rule{
|
|
// Comments
|
|
{regexp.MustCompile(`//.*$`), "comment", 100},
|
|
{regexp.MustCompile(`/\*[\s\S]*?\*/`), "comment", 100},
|
|
|
|
// Strings
|
|
{regexp.MustCompile(`"(?:[^"\\]|\\.)*"`), "string", 90},
|
|
{regexp.MustCompile(`'(?:[^'\\]|\\.)*'`), "string", 90},
|
|
{regexp.MustCompile("`(?:[^`\\]|\\.)*`"), "string", 90},
|
|
|
|
// Keywords
|
|
{regexp.MustCompile(`\b(const|let|var|function|return|if|else|for|while|do|switch|case|default|break|continue|try|catch|finally|throw|new|this|class|extends|import|export|from|async|await)\b`), "keyword", 80},
|
|
|
|
// Types/Built-ins
|
|
{regexp.MustCompile(`\b(Object|Array|String|Number|Boolean|Date|RegExp|Error|Promise)\b`), "type", 70},
|
|
|
|
// Numbers
|
|
{regexp.MustCompile(`\b\d+(\.\d+)?\b`), "number", 60},
|
|
|
|
// Functions
|
|
{regexp.MustCompile(`\b([a-zA-Z_$][a-zA-Z0-9_$]*)\s*\(`), "function", 50},
|
|
|
|
// Default
|
|
{regexp.MustCompile(`[a-zA-Z_$][a-zA-Z0-9_$]*`), "identifier", 10},
|
|
},
|
|
}
|
|
}
|
|
|
|
func (t *Tokenizer) addTextLanguage() {
|
|
t.languages["text"] = &Language{
|
|
Name: "text",
|
|
Rules: []Rule{
|
|
// Just return everything as text
|
|
{regexp.MustCompile(`.+`), "text", 1},
|
|
},
|
|
}
|
|
}
|