前一篇JSON性能测试对比Fastjson2和DslJson,有评论提到和simdjson-java对比,那就测试走起.
说下simdjson-java这个库,之前也听说过,由于早前官网测试用的文本非常离谱,而且依赖JDK18+就没怎么关注,现在只要搜索simdjson(c++)基本上就能看到宣传最多的就是每秒千兆解析超越同类C++库甚至25倍的都有,不确定simdjson和simdjson-java性能差异有多大,下面只和simdjson-java做下对比测试。
测试环境: openJdk21 + window10 + i5 单线程
jvm参数: -Xms3g -Xmx3g --add-opens=java.base/java.time=ALL-UNNAMED --add-modules=jdk.incubator.vector
maven 依赖截止当前最新版本
<dependency>
<groupId>org.simdjson</groupId>
<artifactId>simdjson-java</artifactId>
<version>0.3.0</version>
</dependency>
<dependency>
<groupId>io.github.wycst</groupId>
<artifactId>wast</artifactId>
<version>0.0.19</version>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-core</artifactId>
<version>1.36</version>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-generator-annprocess</artifactId>
<version>1.36</version>
</dependency>
场景测试一: 测试数据摘自simdjson-java官方(twitter.json内容已经相对正常多了),数据大小: 631kb
import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONObject;
import io.github.wycst.wast.common.utils.IOUtils;
import io.github.wycst.wast.json.JSONNode;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
import org.openjdk.jmh.runner.options.TimeValue;
import org.simdjson.JsonValue;
import org.simdjson.SimdJsonParser;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
/**
* @Date 2024/11/24 15:46
* @Created by wangyc
*/
public class TwitterJmhTest {
final static byte[] buffer;
final static SimdJsonParser simdJsonParser = new SimdJsonParser();
static {
try {
buffer = IOUtils.readBytes(TwitterJmhTest.class.getResourceAsStream("/data/json/twitter.json"));
System.out.println("data size: " + (buffer.length >> 10) + "kb");
} catch (Exception e) {
throw new RuntimeException(e);
}
}
// @Benchmark
// public int parseAndSelect_Fastjson2() {
// JSONObject jsonObject = (JSONObject) JSON.parse(buffer);
// Set<String> defaultUsers = new HashSet<>();
// Iterator<Object> tweets = jsonObject.getJSONArray("statuses").iterator();
// while (tweets.hasNext()) {
// JSONObject tweet = (JSONObject) tweets.next();
// JSONObject user = (JSONObject) tweet.get("user");
// if (user.getBoolean("default_profile")) {
// defaultUsers.add(user.getString("screen_name"));
// }
// }
// return defaultUsers.size();
// }
//
// @Benchmark
// public int schemaBasedParseAndSelect_Fastjson2() {
// Set<String> defaultUsers = new HashSet<>();
// SimdJsonTwitter twitter = JSON.parseObject(buffer, SimdJsonTwitter.class);
// for (SimdJsonStatus status : twitter.statuses) {
// SimdJsonUser user = status.user;
// if (user.default_profile) {
// defaultUsers.add(user.screen_name);
// }
// }
// return defaultUsers.size();
// }
@Benchmark
public int parseAndSelect_WastJson() {
List<JSONNode> users = JSONNode.collect(buffer, "/statuses/*/user");
Set<String> defaultUsers = new HashSet<>();
for (JSONNode user : users) {
if (user.getChildValue("default_profile", boolean.class)) {
defaultUsers.add(user.getChildValue("screen_name", String.class));
}
}
return defaultUsers.size();
}
@Benchmark
public int schemaBasedParseAndSelect_WastJson() {
Set<String> defaultUsers = new HashSet<>();
SimdJsonTwitter twitter = io.github.wycst.wast.json.JSON.parseObject(buffer, SimdJsonTwitter.class);
for (SimdJsonStatus status : twitter.statuses) {
SimdJsonUser user = status.user;
if (user.default_profile) {
defaultUsers.add(user.screen_name);
}
}
return defaultUsers.size();
}
@Benchmark
public int parseAndSelect_SimdjsonJava() {
JsonValue simdJsonValue = simdJsonParser.parse(buffer, buffer.length);
Set<String> defaultUsers = new HashSet<>();
Iterator<JsonValue> tweets = simdJsonValue.get("statuses").arrayIterator();
while (tweets.hasNext()) {
JsonValue tweet = tweets.next();
JsonValue user = tweet.get("user");
if (user.get("default_profile").asBoolean()) {
defaultUsers.add(user.get("screen_name").asString());
}
}
return defaultUsers.size();
}
@Benchmark
public int schemaBasedParseAndSelect_SimdjsonJava() {
Set<String> defaultUsers = new HashSet<>();
SimdJsonTwitter twitter = simdJsonParser.parse(buffer, buffer.length, SimdJsonTwitter.class);
for (SimdJsonStatus status : twitter.statuses()) {
SimdJsonUser user = status.user();
if (user.default_profile()) {
defaultUsers.add(user.screen_name());
}
}
return defaultUsers.size();
}
record SimdJsonUser(boolean default_profile, String screen_name) {
}
record SimdJsonStatus(SimdJsonUser user) {
}
record SimdJsonTwitter(List<SimdJsonStatus> statuses) {
}
public static void main(String[] args) throws RunnerException {
Options options = new OptionsBuilder()
.include(TwitterJmhTest.class.getName())
.mode(Mode.Throughput)
.warmupIterations(1)
.warmupTime(TimeValue.seconds(3))
.measurementTime(TimeValue.seconds(3))
.forks(1)
.threads(1)
.build();
new Runner(options).run();
}
}
测试结果(每秒运行次数)
Benchmark Mode Cnt Score Error Units
TwitterJmhTest.parseAndSelect_SimdjsonJava thrpt 5 1370.652 ± 37.171 ops/s
TwitterJmhTest.parseAndSelect_WastJson thrpt 5 1997.455 ± 275.386 ops/s
TwitterJmhTest.schemaBasedParseAndSelect_SimdjsonJava thrpt 5 2092.163 ± 98.596 ops/s
TwitterJmhTest.schemaBasedParseAndSelect_WastJson thrpt 5 2654.975 ± 102.240 ops/s
parseAndSelect: WastJson 大约是 SimdjsonJava的1.45倍(解析速度wast 1.23g/s, simdjson 0.84g/s)
schemaBasedParseAndSelect:WastJson大约是SimdjsonJava的1.26倍(解析速度wast 1.6g/s, simdjson 1.28g/s)
以上是在simdjson-java官方提供数据下,WastJson都能完胜.
场景测试二: 全部为ascii编码的字节数组,大小1464kb
测试代码
package com.jmh.test.simdjson;
import io.github.wycst.wast.json.JSON;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
import org.openjdk.jmh.runner.options.TimeValue;
import org.simdjson.JsonValue;
import org.simdjson.SimdJsonParser;
import java.util.HashMap;
import java.util.Map;
/**
* @Date 2024/11/27
* @Created by wangyc
*/
public class AsciiSourceJmhTest {
final static byte[] buffer;
final static SimdJsonParser simdJsonParser = new SimdJsonParser();
static {
try {
int length = 100000;
StringBuilder value = new StringBuilder("a".repeat(length));
Map map = new HashMap();
map.put("a", value);
map.put("b", value);
map.put("c", value);
map.put("d", value);
map.put("e", value);
map.put("f", value);
map.put("g", value);
map.put("h", value);
map.put("i", value);
map.put("j", value);
map.put("k", value);
map.put("l", value);
map.put("m", value);
map.put("n", value);
map.put("o", value);
buffer = io.github.wycst.wast.json.JSON.toJsonBytes(map);
System.out.println("data size: " + (buffer.length >> 10) + "kb");
} catch (Exception e) {
throw new RuntimeException(e);
}
}
@Benchmark
public long ascii_WastJson() {
long result = 0;
Map map = (Map) JSON.parse(buffer);
result += map.get("a").toString().length();
result += map.get("b").toString().length();
result += map.get("c").toString().length();
result += map.get("d").toString().length();
result += map.get("e").toString().length();
result += map.get("f").toString().length();
result += map.get("g").toString().length();
result += map.get("h").toString().length();
result += map.get("i").toString().length();
result += map.get("j").toString().length();
result += map.get("k").toString().length();
result += map.get("l").toString().length();
result += map.get("m").toString().length();
result += map.get("n").toString().length();
result += map.get("o").toString().length();
return result;
}
@Benchmark
public long ascii_SimdjsonJava() {
long result = 0;
JsonValue simdJsonValue = simdJsonParser.parse(buffer, buffer.length);
result += simdJsonValue.get("a").asString().length();
result += simdJsonValue.get("b").asString().length();
result += simdJsonValue.get("c").asString().length();
result += simdJsonValue.get("d").asString().length();
result += simdJsonValue.get("e").asString().length();
result += simdJsonValue.get("f").asString().length();
result += simdJsonValue.get("g").asString().length();
result += simdJsonValue.get("h").asString().length();
result += simdJsonValue.get("i").asString().length();
result += simdJsonValue.get("j").asString().length();
result += simdJsonValue.get("k").asString().length();
result += simdJsonValue.get("l").asString().length();
result += simdJsonValue.get("m").asString().length();
result += simdJsonValue.get("n").asString().length();
result += simdJsonValue.get("o").asString().length();
return result;
}
public static void main(String[] args) throws RunnerException {
Options options = new OptionsBuilder()
.include(AsciiSourceJmhTest.class.getName())
.mode(Mode.Throughput)
.warmupIterations(1)
.warmupTime(TimeValue.seconds(3))
.measurementTime(TimeValue.seconds(3))
.forks(1)
.threads(1)
.build();
new Runner(options).run();
}
}
测试结果(每秒运行次数)
Benchmark Mode Cnt Score Error Units
AsciiSourceJmhTest.ascii_SimdjsonJava thrpt 5 1231.578 ± 682.959 ops/s
AsciiSourceJmhTest.ascii_WastJson thrpt 5 4212.839 ± 2349.629 ops/s
该场景下WastJson是Simdjson的3.4倍(解析速度 wast: 6.16GB/s, simdjson 1.8GB/s)
以上测试均为个人电脑环境下面的测试,感兴趣的也可以下载代码测试,如果有专业人士能测试不同平台的性能差异万分感谢!
测试代码仓库: https://gitee.com/xiaoch0209/wast-jmh-test/tree/main-openjdk23/ (以上测试代码和测试结果都已上传)