以下是一个简单的 SQL SELECT 解析器的 Java 实现,支持单表查询和基本条件过滤。代码包含词法分析和语法分析模块,并支持以下语法:
SELECT column1, column2 FROM table WHERE column3 = 5
完整代码
1. Token 类型定义 (TokenType.java
)
public enum TokenType {
SELECT, FROM, WHERE,
IDENTIFIER, COMMA, STAR, EQUALS,
STRING, NUMBER, EOF
}
2. Token 类 (Token.java
)
public class Token {
public final TokenType type;
public final String value;
public Token(TokenType type, String value) {this.type = type;this.value = value;
}@Override
public String toString() {return String.format("(%s, %s)", type, value);
}
}
3. 词法分析器 (Lexer.java
)
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Lexer {
private final String input;
private int pos = 0;
private static final Pattern TOKEN_PATTERN = Pattern.compile("(?i)(SELECT|FROM|WHERE)" + // 关键字 (不区分大小写)"|([a-zA-Z_][a-zA-Z0-9_]*)" + // 标识符"|(\\*)" + // 星号"|(,)" + // 逗号"|(=)" + // 等于号"|('([^']*)')" + // 字符串字面量"|(\\d+)" + // 数字"|(\\s+)" // 空白字符 (跳过)
);public Lexer(String input) {this.input = input;
}public ArrayList<Token> tokenize() {ArrayList<Token> tokens = new ArrayList<>();Matcher matcher = TOKEN_PATTERN.matcher(input);while (pos < input.length()) {if (!matcher.find(pos)) {throw new RuntimeException("Invalid token at position: " + pos);}// 跳过空白字符if (matcher.group(7) != null) {pos = matcher.end();continue;}// 匹配其他 Tokenfor (int i = 1; i <= matcher.groupCount(); i++) {if (matcher.group(i) != null) {TokenType type = null;String value = matcher.group(i);switch (i) {case 1: // 关键字type = TokenType.valueOf(value.toUpperCase());break;case 2: // 标识符type = TokenType.IDENTIFIER;break;case 3: // *type = TokenType.STAR;break;case 4: // ,type = TokenType.COMMA;break;case 5: // =type = TokenType.EQUALS;break;case 6: // 字符串 (带引号)type = TokenType.STRING;value = matcher.group(7); // 去掉引号break;case 8: // 数字type = TokenType.NUMBER;break;}if(type!=null){tokens.add(new Token(type, value));}pos = matcher.end();break;}}}tokens.add(new Token(TokenType.EOF, ""));return tokens;
}
}
4. AST 结构类 (SelectQuery.java
)
import java.util.List;
public class SelectQuery {
public List columns;
public String table;
public Condition whereCondition;
public static class Condition {public String column;public String operator;public String value;
}
}
5. 语法解析器 (Parser.java
)
import java.util.ArrayList;
import java.util.List;
public class Parser {
private final List tokens;
private int pos = 0;
public Parser(List<Token> tokens) {this.tokens = tokens;
}public SelectQuery parse() {SelectQuery query = new SelectQuery();parseSelect(query);parseFrom(query);parseWhere(query);return query;
}private void parseSelect(SelectQuery query) {consume(TokenType.SELECT);query.columns = new ArrayList<>();if (peek().type == TokenType.STAR) {consume(TokenType.STAR);query.columns.add("*");} else {do {query.columns.add(consume(TokenType.IDENTIFIER).value);} while (match(TokenType.COMMA));}
}private void parseFrom(SelectQuery query) {consume(TokenType.FROM);query.table = consume(TokenType.IDENTIFIER).value;
}private void parseWhere(SelectQuery query) {if (match(TokenType.WHERE)) {SelectQuery.Condition condition = new SelectQuery.Condition();condition.column = consume(TokenType.IDENTIFIER).value;consume(TokenType.EQUALS);Token valueToken = peek();if (valueToken.type == TokenType.STRING || valueToken.type == TokenType.NUMBER) {condition.value = valueToken.value;advance();} else {throw new RuntimeException("Expected string or number");}query.whereCondition = condition;}
}private Token consume(TokenType expected) {Token token = peek();if (token.type != expected) {throw new RuntimeException("Expected " + expected + ", found " + token.type);}advance();return token;
}private boolean match(TokenType type) {if (peek().type == type) {advance();return true;}return false;
}private Token peek() {return tokens.get(pos);
}private void advance() {pos++;
}
}
6. 测试主类 (Main.java
)
import java.util.List;
public class Main {
public static void main(String[] args) {
String sql = “SELECT id, name FROM users WHERE age = 25”;
Lexer lexer = new Lexer(sql);
List tokens = lexer.tokenize();
Parser parser = new Parser(tokens);
SelectQuery query = parser.parse();
System.out.println("Columns: " + query.columns);System.out.println("Table: " + query.table);if (query.whereCondition != null) {System.out.println("WHERE " + query.whereCondition.column + " = " + query.whereCondition.value);}
}
}
代码说明
-
词法分析器 (
Lexer
)- 使用正则表达式匹配 SQL 关键字、标识符、数字、字符串等 Token。
- 跳过空白字符,返回 Token 列表。
-
语法解析器 (
Parser
)- 递归下降解析器,依次解析
SELECT
、FROM
、WHERE
子句。 - 构建
SelectQuery
对象存储解析结果。
- 递归下降解析器,依次解析
-
AST 结构 (
SelectQuery
)- 保存查询的列、表名和过滤条件。
-
测试示例 (
Main
)- 输入 SQL 语句,输出解析后的结构。
运行结果
Columns: [id, name]
Table: users
WHERE age = 25
支持特性
- 单表
SELECT
查询 - 列名列表或
*
- 简单的
WHERE
条件(仅支持=
和字符串/数字值)
可根据需要扩展 WHERE 条件(如 >
, <
, AND
/OR
)和更复杂的数据类型。