版本 | 时间 | 修改内容 |
v1 | 2024.06.13 | 新建 |
v2 | 2024.06.28 | 更新body和请求类型筛选的正则表达式内容,特殊换符和转移符剔除 |
该工具可以将curl脚本中的header解析为kv map结构;获取url路径、请求类型;解析url参数列表;解析body请求体:form表单、raw body、kv body、xml/json/text结构体等。
使用示例
获取一个http curl脚本:
curl --location --request post 'https://cainiao-inc.com?param_1=value_1¶m_2=value_2' \
--header 'cookie: user_cookie' \
--header 'content-type: application/json' \
--data-raw '{
"appname": "link",
"apiid": "test_api",
"content": {
"address": "cainiao home",
"city": "hangzhou"
}
}'执行解析例子:

实现原理
实现原理很简单:基于java正则 + 责任链设计模式,按照curl脚本的常见语法去匹配、解析即可~
按照curl语法结构,可以将其拆分为 5 个部分:
- url路径:http://cainiao.com
- url参数列表:?param_1=valie_1¶m_2=valie_2
- 请求方法类型: 例如 post、get、delete、put...... 需要正则匹配
-x--request等标识符 - header请求头:例如 cookie、token、content-type...... 需要正则匹配
-h--header等标识符 - body请求体:可以分为
form-data/-form、data-raw、data-urlencode、-d、--data、kvbody等。格式可能包含json、xml、文本、kv键值对,二进制流(暂不支持解析)等等。
具体实现
流程简图:

类关系图:

curlparserutil
curl解析工具类:
public class curlparserutil {
/**
* 该方法是用来解析curl的入口。
*
* @param curl 输入的curl文本字符串
* @return 返回解析后生成的curl实体对象
*/
public static curlentity parse(string curl) {
curlentity entity = curlentity.builder().build();
icurlhandler<curlentity, string> handlerchain = curlhandlerchain.init();
// 如需扩展其他解析器,继续往链表中add即可
handlerchain.next(new urlpathhandler())
.next(new urlparamshandler())
.next(new httpmethodhandler())
.next(new headerhandler())
.next(new httpbodyhandler());
handlerchain.handle(entity, curl);
return entity;
}
}curlentity
解析后得到的curl实体类(这里分了5个部分)
@data
@builder
public class curlentity {
/**
* url路径
*/
private string url;
/**
* 请求方法类型
*/
private method method;
/**
* url参数
*/
private map<string, string> urlparams;
/**
* header参数
*/
private map<string, string> headers;
/**
* 请求体
*/
private jsonobject body;
public enum method {
get,
post,
put,
delete
}
}icurlhandler
责任链链表结构定义:
public interface icurlhandler<r, s> {
icurlhandler<curlentity, string> next(icurlhandler<curlentity, string> handler);
void handle(curlentity entity, string curl);
}curlhandlerchain
责任链载体:
public abstract class curlhandlerchain implements icurlhandler<curlentity, string> {
icurlhandler<curlentity, string> next;
@override
public icurlhandler<curlentity, string> next(icurlhandler<curlentity, string> handler) {
this.next = handler;
return this.next;
}
@override
public abstract void handle(curlentity entity, string curl);
/**
* for subclass call
*/
protected void nexthandle(curlentity curlentity, string curl) {
if (next != null) {
next.handle(curlentity, curl);
}
}
protected void validate(string curl) {
if (stringutils.isblank(curl)) {
throw new illegalargumentexception("curl script is empty");
}
matcher matcher = curl_basic_structure_pattern.matcher(curl);
if (!matcher.find()) {
throw new illegalargumentexception("curl script is invalid");
}
}
public static curlhandlerchain init() {
return new curlhandlerchain() {
@override
public void handle(curlentity entity, string curl) {
this.validate(curl);
// 替换掉可能存在的转译(字符串中的空白字符,包括空格、换行符和制表符...)
curl = curl.replace("\\", "")
.replace("\n", "")
.replace("\t", "");
if (next != null) {
next.handle(entity, curl);
}
}
};
}
public void log(object... logparams) {
// write log for subclass extensions
}
}urlpathhandler
url路径解析:
public class urlpathhandler extends curlhandlerchain {
@override
public void handle(curlentity entity, string curl) {
string url = parseurlpath(curl);
entity.seturl(url);
this.log(url);
super.nexthandle(entity, curl);
}
/**
* 该方法用于解析url路径。
*
* @param curl 需要解析的url,以字符串形式给出
* @return url中的路径部分。如果找不到,将返回null
*/
private string parseurlpath(string curl) {
matcher matcher = curlpatternconstants.url_path_pattern.matcher(curl);
if (matcher.find()) {
return matcher.group(1) != null ? matcher.group(1) : matcher.group(3);
}
return null;
}
@override
public void log(object... logparams) {
logprinter.info("urlpathhandler execute: url={}", logparams);
}
}httpmethodhandler
请求类型解析:
public class httpmethodhandler extends curlhandlerchain {
@override
public void handle(curlentity entity, string curl) {
curlentity.method method = parsemethod(curl);
entity.setmethod(method);
this.log(method);
super.nexthandle(entity, curl);
}
private curlentity.method parsemethod(string curl) {
matcher matcher = curlpatternconstants.http_method_pattern.matcher(curl);
matcher defaultmatcher = curlpatternconstants.default_http_method_pattern.matcher(curl);
if (matcher.find()) {
string method = matcher.group(1);
return curlentity.method.valueof(method.touppercase());
} else if (defaultmatcher.find()) {
// 如果命令中包含 -d 或 --data,没有明确请求方法,默认为 post
return curlentity.method.post;
} else {
// 没有明确指定请求方法,默认为 get
return curlentity.method.get;
}
}
@override
public void log(object... logparams) {
logprinter.info("httpmethodhandler execute: method={}", logparams);
}
}urlparamshandler
url参数列表解析:
public class urlparamshandler extends curlhandlerchain {
@override
public void handle(curlentity entity, string curl) {
string url = extracturl(curl);
map<string, string> urlparams = parseurlparams(url);
entity.seturlparams(urlparams);
this.log(urlparams);
super.nexthandle(entity, curl);
}
private string extracturl(string curl) {
matcher matcher = curlpatternconstants.url_params_pattern.matcher(curl);
if (matcher.find()) {
return matcher.group(1);
}
return null;
}
private map<string, string> parseurlparams(string url) {
if (stringutils.isblank(url)) {
return collections.emptymap();
}
map<string, string> urlparams = new hashmap<>();
// 提取url的查询参数部分
string[] urlparts = url.split("\\?");
if (urlparts.length > 1) {
// 只处理存在查询参数的情况
string query = urlparts[1];
// 解析查询参数到map
string[] pairs = query.split("&");
for (string pair : pairs) {
int idx = pair.indexof("=");
if (idx != -1 && idx < pair.length() - 1) {
string key = pair.substring(0, idx);
string value = pair.substring(idx + 1);
urlparams.put(key, value);
} else {
// 存在无值的参数时
urlparams.put(pair, null);
}
}
}
return urlparams;
}
@override
public void log(object... logparams) {
logprinter.info("urlparamshandler execute: urlparams={}", logparams);
}
}headerhandler
http header解析:
public class headerhandler extends curlhandlerchain{
@override
public void handle(curlentity entity, string curl) {
map<string, string> headers = parseheaders(curl);
entity.setheaders(headers);
this.log(headers);
super.nexthandle(entity, curl);
}
private map<string, string> parseheaders(string curl) {
if (stringutils.isblank(curl)) {
return collections.emptymap();
}
matcher matcher = curlpatternconstants.curl_headers_pattern.matcher(curl);
map<string, string> headers = new hashmap<>();
while (matcher.find()) {
string header = matcher.group(1);
string[] headerkeyvalue = header.split(":", 2);
if (headerkeyvalue.length == 2) {
// 去除键和值的首尾空白字符
headers.put(headerkeyvalue[0].trim(), headerkeyvalue[1].trim());
}
}
return headers;
}
@override
public void log(object... logparams) {
logprinter.info("headerhandler execute: headers={}", logparams);
}
}httpbodyhandler
request body请求体解析:
- form-data/-form
- data-urlencode
- data-raw
- default/-d/--data
格式可能包含json、xml、文本、kv键值对,二进制流(暂不支持解析)等等。
public class httpbodyhandler extends curlhandlerchain {
@override
public void handle(curlentity entity, string curl) {
jsonobject body = parsebody(curl);
entity.setbody(body);
this.log(body);
super.nexthandle(entity, curl);
}
private jsonobject parsebody(string curl) {
matcher formmatcher = curlpatternconstants.http_from_body_pattern.matcher(curl);
if (formmatcher.find()) {
return parseformbody(formmatcher);
}
matcher urlencodematcher = curlpatternconstants.http_urlencode_body_pattern.matcher(curl);
if (urlencodematcher.find()) {
return parseurlencodebody(urlencodematcher);
}
matcher rawmatcher = curlpatternconstants.http_row_body_pattern.matcher(curl);
if (rawmatcher.find()) {
return parserowbody(rawmatcher);
}
matcher defaultmatcher = curlpatternconstants.default_http_body_pattern.matcher(curl);
if (defaultmatcher.find()) {
return parsedefaultbody(defaultmatcher);
}
return new jsonobject();
}
private jsonobject parsedefaultbody(matcher defaultmatcher) {
string bodystr = "";
if (defaultmatcher.group(1) != null) {
// 单引号包裹的数据
bodystr = defaultmatcher.group(1);
} else if (defaultmatcher.group(2) != null) {
// 双引号包裹的数据
bodystr = defaultmatcher.group(2);
} else {
// 无引号的数据
bodystr = defaultmatcher.group(3);
}
// 判断是否是json结构
if (isjson(bodystr)) {
return jsonobject.parseobject(bodystr);
}
// 特殊case: username=test&password=secret
matcher kvmatcher = curlpatternconstants.default_http_body_pattern_kv.matcher(bodystr);
return kvmatcher.matches() ? parsekvbody(bodystr) : new jsonobject();
}
private jsonobject parsekvbody(string kvbodystr) {
jsonobject json = new jsonobject();
string[] pairs = kvbodystr.split("&");
for (string pair : pairs) {
int idx = pair.indexof("=");
string key = urldecoder.decode(pair.substring(0, idx), standardcharsets.utf_8);
string value = urldecoder.decode(pair.substring(idx + 1), standardcharsets.utf_8);
json.put(key, value);
}
return json;
}
private jsonobject parseformbody(matcher formmatcher) {
jsonobject formdata = new jsonobject();
// 重置指针匹配的位置
formmatcher.reset();
while (formmatcher.find()) {
// 提取表单项
string formitem = formmatcher.group(1) != null ? formmatcher.group(1) : formmatcher.group(2);
// 分割键和值
string[] keyvalue = formitem.split("=", 2);
if (keyvalue.length == 2) {
string key = keyvalue[0];
string value = keyvalue[1];
// 检测文件字段标记
// ps: 理论上文件标记字段不需要支持
if (value.startswith("@")) {
// 只提取文件名,不读取文件内容
formdata.put(key, value.substring(1));
} else {
// 放入表单数据
formdata.put(key, value);
}
}
}
return formdata;
}
private jsonobject parseurlencodebody(matcher urlencodematcher) {
jsonobject urlencodedata = new jsonobject();
// 重置指针匹配的位置
urlencodematcher.reset();
while (urlencodematcher.find()) {
// 提取键值对字符串
string keyvalueencoded = urlencodematcher.group(1);
// 分隔键和值
string[] keyvalue = keyvalueencoded.split("=", 2);
if (keyvalue.length == 2) {
string key = keyvalue[0];
string value = keyvalue[1];
// 对值进行url解码
string decodedvalue = urldecoder.decode(value, standardcharsets.utf_8);
// 存入数据到json对象
urlencodedata.put(key, decodedvalue);
}
}
return urlencodedata;
}
private jsonobject parserowbody(matcher rowmatcher) {
string rawdata = rowmatcher.group(1);
if (isxml(rawdata)) {
// throw new illegalargumentexception("curl --data-raw content cant' be xml");
return xml2json(rawdata);
}
try {
return json.parseobject(rawdata);
} catch (exception e) {
throw new illegalargumentexception("curl --data-raw content is not a valid json");
}
}
private boolean isjson(string jsonstr) {
try {
jsonobject.parseobject(jsonstr);
return true;
} catch (exception e) {
return false;
}
}
public static boolean isxml(string xmlstr) {
try {
documentbuilderfactory factory = documentbuilderfactory.newinstance();
factory.setfeature(securityconstants.ddd, true);
factory.setfeature(securityconstants.ege, false);
factory.setfeature(securityconstants.epe, false);
documentbuilder builder = factory.newdocumentbuilder();
inputsource is = new inputsource(new stringreader(xmlstr));
builder.parse(is);
return true;
} catch (exception e) {
return false;
}
}
private jsonobject xml2json(string xmlstr) {
try {
org.json.jsonobject orgjsonobj = xml.tojsonobject(xmlstr);
string jsonstring = orgjsonobj.tostring();
return json.parseobject(jsonstring);
} catch (jsonexception e) {
throw new linkconsoleexception("curl --data-raw content xml2json error", e);
}
}
@override
public void log(object... logparams) {
logprinter.info("httpbodyhandler execute: body={}", logparams);
}
}curlpatternconstants
正则匹配常量定义:
public interface curlpatternconstants {
/**
* curl基本结构校验
*/
pattern curl_basic_structure_pattern = pattern.compile("^curl (\\s+)");
/**
* url路径匹配
*/
pattern url_path_pattern =
pattern.compile("(?:\\s|^)(?:'|\")?(https?://[^?\\s'\"]*)(?:\\?[^\\s'\"]*)?(?:'|\")?(?:\\s|$)");
/**
* 请求参数列表匹配
*/
pattern url_params_pattern = pattern.compile("(?:\\s|^)(?:'|\")?(https?://[^\\s'\"]+)(?:'|\")?(?:\\s|$)");
/**
* http请求方法匹配
*/
//pattern http_method_pattern = pattern.compile("(?:-x|--request)\\s+(\\s+)");
pattern http_method_pattern = pattern.compile("curl\\s+[^\\s]*\\s+(?:-x|--request)\\s+'?(get|post)'?");
/**
* 默认http请求方法匹配
*/
pattern default_http_method_pattern = pattern.compile(".*\\s(-d|--data|--data-binary)\\s.*");
/**
* 请求头匹配
*/
pattern curl_headers_pattern = pattern.compile("(?:-h|--header)\\s+'(.*?:.*?)'");
/**
* -d/--data 请求体匹配
*/
pattern default_http_body_pattern = pattern.compile("(?:--data|-d)\\s+(?:'([^']*)'|\"([^\"]*)\"|(\\s+))", pattern.dotall);
pattern default_http_body_pattern_kv = pattern.compile("^([^=&]+=[^=&]+)(?:&[^=&]+=[^=&]+)*$", pattern.dotall);
/**
* --data-raw 请求体匹配
*/
pattern http_row_body_pattern = pattern.compile("--data-raw '(.+?)'(?s)", pattern.dotall);
/**
* --form 请求体匹配
*/
pattern http_from_body_pattern = pattern.compile("--form\\s+'(.*?)'|-f\\s+'(.*?)'");
/**
* --data-urlencode 请求体匹配
*/
pattern http_urlencode_body_pattern = pattern.compile("--data-urlencode\\s+'(.*?)'");
}其他代码
public class securityconstants {
public static string ddd = "http://apache.org/xml/features/disallow-doctype-decl";
public static string ege = "http://xml.org/sax/features/external-general-entities";
public static string epe = "http://xml.org/sax/features/external-parameter-entities";
public static string led = "http://apache.org/xml/features/nonvalidating/load-external-dtd";
}以上就是使用java实现一个解析curl脚本小工具的详细内容,更多关于java实现解析curl工具的资料请关注代码网其它相关文章!
发表评论