版本 | 时间 | 修改内容 |
v1 | 2024.06.13 | 新建 |
v2 | 2024.06.28 | 更新body和请求类型筛选的正则表达式内容,特殊换符和转移符剔除 |
该工具可以将curl脚本中的header解析为kv map结构;获取url路径、请求类型;解析url参数列表;解析body请求体:form表单、raw body、kv body、xml/json/text结构体等。
使用示例
获取一个http curl脚本:
curl --location --request post 'https://cainiao-inc.com?param_1=value_1¶m_2=value_2' \ --header 'cookie: user_cookie' \ --header 'content-type: application/json' \ --data-raw '{ "appname": "link", "apiid": "test_api", "content": { "address": "cainiao home", "city": "hangzhou" } }'
执行解析例子:
实现原理
实现原理很简单:基于java正则 + 责任链设计模式,按照curl脚本的常见语法去匹配、解析即可~
按照curl语法结构,可以将其拆分为 5 个部分:
- url路径:http://cainiao.com
- url参数列表:?param_1=valie_1¶m_2=valie_2
- 请求方法类型: 例如 post、get、delete、put...... 需要正则匹配
-x
--request
等标识符 - header请求头:例如 cookie、token、content-type...... 需要正则匹配
-h
--header
等标识符 - body请求体:可以分为
form-data/-form
、data-raw
、data-urlencode
、-d
、--data
、kvbody
等。格式可能包含json、xml、文本、kv键值对,二进制流(暂不支持解析)等等。
具体实现
流程简图:
类关系图:
curlparserutil
curl解析工具类:
public class curlparserutil { /** * 该方法是用来解析curl的入口。 * * @param curl 输入的curl文本字符串 * @return 返回解析后生成的curl实体对象 */ public static curlentity parse(string curl) { curlentity entity = curlentity.builder().build(); icurlhandler<curlentity, string> handlerchain = curlhandlerchain.init(); // 如需扩展其他解析器,继续往链表中add即可 handlerchain.next(new urlpathhandler()) .next(new urlparamshandler()) .next(new httpmethodhandler()) .next(new headerhandler()) .next(new httpbodyhandler()); handlerchain.handle(entity, curl); return entity; } }
curlentity
解析后得到的curl实体类(这里分了5个部分)
@data @builder public class curlentity { /** * url路径 */ private string url; /** * 请求方法类型 */ private method method; /** * url参数 */ private map<string, string> urlparams; /** * header参数 */ private map<string, string> headers; /** * 请求体 */ private jsonobject body; public enum method { get, post, put, delete } }
icurlhandler
责任链链表结构定义:
public interface icurlhandler<r, s> { icurlhandler<curlentity, string> next(icurlhandler<curlentity, string> handler); void handle(curlentity entity, string curl); }
curlhandlerchain
责任链载体:
public abstract class curlhandlerchain implements icurlhandler<curlentity, string> { icurlhandler<curlentity, string> next; @override public icurlhandler<curlentity, string> next(icurlhandler<curlentity, string> handler) { this.next = handler; return this.next; } @override public abstract void handle(curlentity entity, string curl); /** * for subclass call */ protected void nexthandle(curlentity curlentity, string curl) { if (next != null) { next.handle(curlentity, curl); } } protected void validate(string curl) { if (stringutils.isblank(curl)) { throw new illegalargumentexception("curl script is empty"); } matcher matcher = curl_basic_structure_pattern.matcher(curl); if (!matcher.find()) { throw new illegalargumentexception("curl script is invalid"); } } public static curlhandlerchain init() { return new curlhandlerchain() { @override public void handle(curlentity entity, string curl) { this.validate(curl); // 替换掉可能存在的转译(字符串中的空白字符,包括空格、换行符和制表符...) curl = curl.replace("\\", "") .replace("\n", "") .replace("\t", ""); if (next != null) { next.handle(entity, curl); } } }; } public void log(object... logparams) { // write log for subclass extensions } }
urlpathhandler
url路径解析:
public class urlpathhandler extends curlhandlerchain { @override public void handle(curlentity entity, string curl) { string url = parseurlpath(curl); entity.seturl(url); this.log(url); super.nexthandle(entity, curl); } /** * 该方法用于解析url路径。 * * @param curl 需要解析的url,以字符串形式给出 * @return url中的路径部分。如果找不到,将返回null */ private string parseurlpath(string curl) { matcher matcher = curlpatternconstants.url_path_pattern.matcher(curl); if (matcher.find()) { return matcher.group(1) != null ? matcher.group(1) : matcher.group(3); } return null; } @override public void log(object... logparams) { logprinter.info("urlpathhandler execute: url={}", logparams); } }
httpmethodhandler
请求类型解析:
public class httpmethodhandler extends curlhandlerchain { @override public void handle(curlentity entity, string curl) { curlentity.method method = parsemethod(curl); entity.setmethod(method); this.log(method); super.nexthandle(entity, curl); } private curlentity.method parsemethod(string curl) { matcher matcher = curlpatternconstants.http_method_pattern.matcher(curl); matcher defaultmatcher = curlpatternconstants.default_http_method_pattern.matcher(curl); if (matcher.find()) { string method = matcher.group(1); return curlentity.method.valueof(method.touppercase()); } else if (defaultmatcher.find()) { // 如果命令中包含 -d 或 --data,没有明确请求方法,默认为 post return curlentity.method.post; } else { // 没有明确指定请求方法,默认为 get return curlentity.method.get; } } @override public void log(object... logparams) { logprinter.info("httpmethodhandler execute: method={}", logparams); } }
urlparamshandler
url参数列表解析:
public class urlparamshandler extends curlhandlerchain { @override public void handle(curlentity entity, string curl) { string url = extracturl(curl); map<string, string> urlparams = parseurlparams(url); entity.seturlparams(urlparams); this.log(urlparams); super.nexthandle(entity, curl); } private string extracturl(string curl) { matcher matcher = curlpatternconstants.url_params_pattern.matcher(curl); if (matcher.find()) { return matcher.group(1); } return null; } private map<string, string> parseurlparams(string url) { if (stringutils.isblank(url)) { return collections.emptymap(); } map<string, string> urlparams = new hashmap<>(); // 提取url的查询参数部分 string[] urlparts = url.split("\\?"); if (urlparts.length > 1) { // 只处理存在查询参数的情况 string query = urlparts[1]; // 解析查询参数到map string[] pairs = query.split("&"); for (string pair : pairs) { int idx = pair.indexof("="); if (idx != -1 && idx < pair.length() - 1) { string key = pair.substring(0, idx); string value = pair.substring(idx + 1); urlparams.put(key, value); } else { // 存在无值的参数时 urlparams.put(pair, null); } } } return urlparams; } @override public void log(object... logparams) { logprinter.info("urlparamshandler execute: urlparams={}", logparams); } }
headerhandler
http header解析:
public class headerhandler extends curlhandlerchain{ @override public void handle(curlentity entity, string curl) { map<string, string> headers = parseheaders(curl); entity.setheaders(headers); this.log(headers); super.nexthandle(entity, curl); } private map<string, string> parseheaders(string curl) { if (stringutils.isblank(curl)) { return collections.emptymap(); } matcher matcher = curlpatternconstants.curl_headers_pattern.matcher(curl); map<string, string> headers = new hashmap<>(); while (matcher.find()) { string header = matcher.group(1); string[] headerkeyvalue = header.split(":", 2); if (headerkeyvalue.length == 2) { // 去除键和值的首尾空白字符 headers.put(headerkeyvalue[0].trim(), headerkeyvalue[1].trim()); } } return headers; } @override public void log(object... logparams) { logprinter.info("headerhandler execute: headers={}", logparams); } }
httpbodyhandler
request body请求体解析:
- form-data/-form
- data-urlencode
- data-raw
- default/-d/--data
格式可能包含json、xml、文本、kv键值对,二进制流(暂不支持解析)等等。
public class httpbodyhandler extends curlhandlerchain { @override public void handle(curlentity entity, string curl) { jsonobject body = parsebody(curl); entity.setbody(body); this.log(body); super.nexthandle(entity, curl); } private jsonobject parsebody(string curl) { matcher formmatcher = curlpatternconstants.http_from_body_pattern.matcher(curl); if (formmatcher.find()) { return parseformbody(formmatcher); } matcher urlencodematcher = curlpatternconstants.http_urlencode_body_pattern.matcher(curl); if (urlencodematcher.find()) { return parseurlencodebody(urlencodematcher); } matcher rawmatcher = curlpatternconstants.http_row_body_pattern.matcher(curl); if (rawmatcher.find()) { return parserowbody(rawmatcher); } matcher defaultmatcher = curlpatternconstants.default_http_body_pattern.matcher(curl); if (defaultmatcher.find()) { return parsedefaultbody(defaultmatcher); } return new jsonobject(); } private jsonobject parsedefaultbody(matcher defaultmatcher) { string bodystr = ""; if (defaultmatcher.group(1) != null) { // 单引号包裹的数据 bodystr = defaultmatcher.group(1); } else if (defaultmatcher.group(2) != null) { // 双引号包裹的数据 bodystr = defaultmatcher.group(2); } else { // 无引号的数据 bodystr = defaultmatcher.group(3); } // 判断是否是json结构 if (isjson(bodystr)) { return jsonobject.parseobject(bodystr); } // 特殊case: username=test&password=secret matcher kvmatcher = curlpatternconstants.default_http_body_pattern_kv.matcher(bodystr); return kvmatcher.matches() ? parsekvbody(bodystr) : new jsonobject(); } private jsonobject parsekvbody(string kvbodystr) { jsonobject json = new jsonobject(); string[] pairs = kvbodystr.split("&"); for (string pair : pairs) { int idx = pair.indexof("="); string key = urldecoder.decode(pair.substring(0, idx), standardcharsets.utf_8); string value = urldecoder.decode(pair.substring(idx + 1), standardcharsets.utf_8); json.put(key, value); } return json; } private jsonobject parseformbody(matcher formmatcher) { jsonobject formdata = new jsonobject(); // 重置指针匹配的位置 formmatcher.reset(); while (formmatcher.find()) { // 提取表单项 string formitem = formmatcher.group(1) != null ? formmatcher.group(1) : formmatcher.group(2); // 分割键和值 string[] keyvalue = formitem.split("=", 2); if (keyvalue.length == 2) { string key = keyvalue[0]; string value = keyvalue[1]; // 检测文件字段标记 // ps: 理论上文件标记字段不需要支持 if (value.startswith("@")) { // 只提取文件名,不读取文件内容 formdata.put(key, value.substring(1)); } else { // 放入表单数据 formdata.put(key, value); } } } return formdata; } private jsonobject parseurlencodebody(matcher urlencodematcher) { jsonobject urlencodedata = new jsonobject(); // 重置指针匹配的位置 urlencodematcher.reset(); while (urlencodematcher.find()) { // 提取键值对字符串 string keyvalueencoded = urlencodematcher.group(1); // 分隔键和值 string[] keyvalue = keyvalueencoded.split("=", 2); if (keyvalue.length == 2) { string key = keyvalue[0]; string value = keyvalue[1]; // 对值进行url解码 string decodedvalue = urldecoder.decode(value, standardcharsets.utf_8); // 存入数据到json对象 urlencodedata.put(key, decodedvalue); } } return urlencodedata; } private jsonobject parserowbody(matcher rowmatcher) { string rawdata = rowmatcher.group(1); if (isxml(rawdata)) { // throw new illegalargumentexception("curl --data-raw content cant' be xml"); return xml2json(rawdata); } try { return json.parseobject(rawdata); } catch (exception e) { throw new illegalargumentexception("curl --data-raw content is not a valid json"); } } private boolean isjson(string jsonstr) { try { jsonobject.parseobject(jsonstr); return true; } catch (exception e) { return false; } } public static boolean isxml(string xmlstr) { try { documentbuilderfactory factory = documentbuilderfactory.newinstance(); factory.setfeature(securityconstants.ddd, true); factory.setfeature(securityconstants.ege, false); factory.setfeature(securityconstants.epe, false); documentbuilder builder = factory.newdocumentbuilder(); inputsource is = new inputsource(new stringreader(xmlstr)); builder.parse(is); return true; } catch (exception e) { return false; } } private jsonobject xml2json(string xmlstr) { try { org.json.jsonobject orgjsonobj = xml.tojsonobject(xmlstr); string jsonstring = orgjsonobj.tostring(); return json.parseobject(jsonstring); } catch (jsonexception e) { throw new linkconsoleexception("curl --data-raw content xml2json error", e); } } @override public void log(object... logparams) { logprinter.info("httpbodyhandler execute: body={}", logparams); } }
curlpatternconstants
正则匹配常量定义:
public interface curlpatternconstants { /** * curl基本结构校验 */ pattern curl_basic_structure_pattern = pattern.compile("^curl (\\s+)"); /** * url路径匹配 */ pattern url_path_pattern = pattern.compile("(?:\\s|^)(?:'|\")?(https?://[^?\\s'\"]*)(?:\\?[^\\s'\"]*)?(?:'|\")?(?:\\s|$)"); /** * 请求参数列表匹配 */ pattern url_params_pattern = pattern.compile("(?:\\s|^)(?:'|\")?(https?://[^\\s'\"]+)(?:'|\")?(?:\\s|$)"); /** * http请求方法匹配 */ //pattern http_method_pattern = pattern.compile("(?:-x|--request)\\s+(\\s+)"); pattern http_method_pattern = pattern.compile("curl\\s+[^\\s]*\\s+(?:-x|--request)\\s+'?(get|post)'?"); /** * 默认http请求方法匹配 */ pattern default_http_method_pattern = pattern.compile(".*\\s(-d|--data|--data-binary)\\s.*"); /** * 请求头匹配 */ pattern curl_headers_pattern = pattern.compile("(?:-h|--header)\\s+'(.*?:.*?)'"); /** * -d/--data 请求体匹配 */ pattern default_http_body_pattern = pattern.compile("(?:--data|-d)\\s+(?:'([^']*)'|\"([^\"]*)\"|(\\s+))", pattern.dotall); pattern default_http_body_pattern_kv = pattern.compile("^([^=&]+=[^=&]+)(?:&[^=&]+=[^=&]+)*$", pattern.dotall); /** * --data-raw 请求体匹配 */ pattern http_row_body_pattern = pattern.compile("--data-raw '(.+?)'(?s)", pattern.dotall); /** * --form 请求体匹配 */ pattern http_from_body_pattern = pattern.compile("--form\\s+'(.*?)'|-f\\s+'(.*?)'"); /** * --data-urlencode 请求体匹配 */ pattern http_urlencode_body_pattern = pattern.compile("--data-urlencode\\s+'(.*?)'"); }
其他代码
public class securityconstants { public static string ddd = "http://apache.org/xml/features/disallow-doctype-decl"; public static string ege = "http://xml.org/sax/features/external-general-entities"; public static string epe = "http://xml.org/sax/features/external-parameter-entities"; public static string led = "http://apache.org/xml/features/nonvalidating/load-external-dtd"; }
以上就是使用java实现一个解析curl脚本小工具的详细内容,更多关于java实现解析curl工具的资料请关注代码网其它相关文章!
发表评论