当前位置: 代码网 > it编程>编程语言>Java > 使用Java实现一个解析CURL脚本小工具

使用Java实现一个解析CURL脚本小工具

2025年02月14日 Java 我要评论
版本时间修改内容v12024.06.13新建v22024.06.28更新body和请求类型筛选的正则表达式内容,特殊换符和转移符剔除该工具可以将curl脚本中的header解析为kvmap结构;获取u

版本

时间

修改内容

v1

2024.06.13

新建

v2

2024.06.28

更新body和请求类型筛选的正则表达式内容,特殊换符和转移符剔除

该工具可以将curl脚本中的header解析为kv map结构;获取url路径、请求类型;解析url参数列表;解析body请求体:form表单、raw body、kv body、xml/json/text结构体等。

使用示例

获取一个http curl脚本:

curl --location --request post 'https://cainiao-inc.com?param_1=value_1&param_2=value_2' \
--header 'cookie: user_cookie' \
--header 'content-type: application/json' \
--data-raw '{
    "appname": "link",
    "apiid": "test_api",
    "content": {
        "address": "cainiao home",
        "city": "hangzhou"
    }
}'

执行解析例子:

实现原理

实现原理很简单:基于java正则 + 责任链设计模式,按照curl脚本的常见语法去匹配、解析即可~

按照curl语法结构,可以将其拆分为 5 个部分:

  • url路径:http://cainiao.com
  • url参数列表:?param_1=valie_1&param_2=valie_2
  • 请求方法类型: 例如 post、get、delete、put...... 需要正则匹配-x --request等标识符
  • header请求头:例如 cookie、token、content-type...... 需要正则匹配-h --header等标识符
  • body请求体:可以分为form-data/-formdata-rawdata-urlencode-d--datakvbody等。格式可能包含json、xml、文本、kv键值对,二进制流(暂不支持解析)等等。

具体实现

流程简图:

类关系图:

curlparserutil

curl解析工具类:

public class curlparserutil {
    /**
     * 该方法是用来解析curl的入口。
     *
     * @param curl 输入的curl文本字符串
     * @return 返回解析后生成的curl实体对象
     */
    public static curlentity parse(string curl) {
        curlentity entity = curlentity.builder().build();
        icurlhandler<curlentity, string> handlerchain = curlhandlerchain.init();
 
        // 如需扩展其他解析器,继续往链表中add即可
        handlerchain.next(new urlpathhandler())
                .next(new urlparamshandler())
                .next(new httpmethodhandler())
                .next(new headerhandler())
                .next(new httpbodyhandler());
 
        handlerchain.handle(entity, curl);
        return entity;
    }
}

curlentity

解析后得到的curl实体类(这里分了5个部分)

@data
@builder
public class curlentity {
    /**
     * url路径
     */
    private string url;
 
    /**
     * 请求方法类型
     */
    private method method;
 
    /**
     * url参数
     */
    private map<string, string> urlparams;
 
    /**
     * header参数
     */
    private map<string, string> headers;
 
    /**
     * 请求体
     */
    private jsonobject body;
 
    public enum method {
        get,
        post,
        put,
        delete
    }
}

icurlhandler

责任链链表结构定义:

public interface icurlhandler<r, s> {
 
    icurlhandler<curlentity, string> next(icurlhandler<curlentity, string> handler);
 
    void handle(curlentity entity, string curl);
}

curlhandlerchain

责任链载体:

public abstract class curlhandlerchain implements icurlhandler<curlentity, string> {
 
    icurlhandler<curlentity, string> next;
 
    @override
    public icurlhandler<curlentity, string> next(icurlhandler<curlentity, string> handler) {
        this.next = handler;
        return this.next;
    }
 
    @override
    public abstract void handle(curlentity entity, string curl);
 
    /**
     * for subclass call
     */
    protected void nexthandle(curlentity curlentity, string curl) {
        if (next != null) {
            next.handle(curlentity, curl);
        }
    }
 
    protected void validate(string curl) {
        if (stringutils.isblank(curl)) {
            throw new illegalargumentexception("curl script is empty");
        }
 
        matcher matcher = curl_basic_structure_pattern.matcher(curl);
        if (!matcher.find()) {
            throw new illegalargumentexception("curl script is invalid");
        }
    }
 
    public static curlhandlerchain init() {
        return new curlhandlerchain() {
            @override
            public void handle(curlentity entity, string curl) {
                this.validate(curl);
 
                // 替换掉可能存在的转译(字符串中的空白字符,包括空格、换行符和制表符...)
                curl = curl.replace("\\", "")
                        .replace("\n", "")
                        .replace("\t", "");
 
                if (next != null) {
                    next.handle(entity, curl);
                }
            }
        };
    }
 
    public void log(object... logparams) {
        // write log for subclass extensions
    }
}

urlpathhandler

url路径解析:

public class urlpathhandler extends curlhandlerchain {
 
    @override
    public void handle(curlentity entity, string curl) {
        string url = parseurlpath(curl);
        entity.seturl(url);
 
        this.log(url);
        super.nexthandle(entity, curl);
    }
 
    /**
     * 该方法用于解析url路径。
     *
     * @param curl 需要解析的url,以字符串形式给出
     * @return url中的路径部分。如果找不到,将返回null
     */
    private string parseurlpath(string curl) {
        matcher matcher = curlpatternconstants.url_path_pattern.matcher(curl);
        if (matcher.find()) {
            return matcher.group(1) != null ? matcher.group(1) : matcher.group(3);
        }
        return null;
    }
 
    @override
    public void log(object... logparams) {
        logprinter.info("urlpathhandler execute: url={}", logparams);
    }
}

httpmethodhandler

请求类型解析:

public class httpmethodhandler extends curlhandlerchain {
 
    @override
    public void handle(curlentity entity, string curl) {
        curlentity.method method = parsemethod(curl);
        entity.setmethod(method);
 
        this.log(method);
        super.nexthandle(entity, curl);
    }
 
    private curlentity.method parsemethod(string curl) {
        matcher matcher = curlpatternconstants.http_method_pattern.matcher(curl);
        matcher defaultmatcher = curlpatternconstants.default_http_method_pattern.matcher(curl);
        if (matcher.find()) {
            string method = matcher.group(1);
            return curlentity.method.valueof(method.touppercase());
        } else if (defaultmatcher.find()) {
            // 如果命令中包含 -d 或 --data,没有明确请求方法,默认为 post
            return curlentity.method.post;
        } else {
            // 没有明确指定请求方法,默认为 get
            return curlentity.method.get;
        }
    }
 
    @override
    public void log(object... logparams) {
        logprinter.info("httpmethodhandler execute: method={}", logparams);
    }
}

urlparamshandler

url参数列表解析:

public class urlparamshandler extends curlhandlerchain {
 
    @override
    public void handle(curlentity entity, string curl) {
        string url = extracturl(curl);
        map<string, string> urlparams = parseurlparams(url);
        entity.seturlparams(urlparams);
 
        this.log(urlparams);
        super.nexthandle(entity, curl);
    }
 
    private string extracturl(string curl) {
        matcher matcher = curlpatternconstants.url_params_pattern.matcher(curl);
        if (matcher.find()) {
            return matcher.group(1);
        }
        return null;
    }
 
    private map<string, string> parseurlparams(string url) {
        if (stringutils.isblank(url)) {
            return collections.emptymap();
        }
 
        map<string, string> urlparams = new hashmap<>();
        // 提取url的查询参数部分
        string[] urlparts = url.split("\\?");
        if (urlparts.length > 1) {
            // 只处理存在查询参数的情况
            string query = urlparts[1];
            // 解析查询参数到map
            string[] pairs = query.split("&");
            for (string pair : pairs) {
                int idx = pair.indexof("=");
                if (idx != -1 && idx < pair.length() - 1) {
                    string key = pair.substring(0, idx);
                    string value = pair.substring(idx + 1);
                    urlparams.put(key, value);
                } else {
                    // 存在无值的参数时
                    urlparams.put(pair, null);
                }
            }
        }
        return urlparams;
    }
 
    @override
    public void log(object... logparams) {
        logprinter.info("urlparamshandler execute: urlparams={}", logparams);
    }
}

headerhandler

http header解析:

public class headerhandler extends curlhandlerchain{
    
    @override
    public void handle(curlentity entity, string curl) {
        map<string, string> headers = parseheaders(curl);
        entity.setheaders(headers);
 
        this.log(headers);
        super.nexthandle(entity, curl);
    }
 
    private map<string, string> parseheaders(string curl) {
        if (stringutils.isblank(curl)) {
            return collections.emptymap();
        }
 
        matcher matcher = curlpatternconstants.curl_headers_pattern.matcher(curl);
        map<string, string> headers = new hashmap<>();
        while (matcher.find()) {
            string header = matcher.group(1);
            string[] headerkeyvalue = header.split(":", 2);
            if (headerkeyvalue.length == 2) {
                // 去除键和值的首尾空白字符
                headers.put(headerkeyvalue[0].trim(), headerkeyvalue[1].trim());
            }
        }
 
        return headers;
    }
 
    @override
    public void log(object... logparams) {
        logprinter.info("headerhandler execute: headers={}", logparams);
    }
}

httpbodyhandler

request body请求体解析:

  • form-data/-form
  • data-urlencode
  • data-raw
  • default/-d/--data

格式可能包含json、xml、文本、kv键值对,二进制流(暂不支持解析)等等。

public class httpbodyhandler extends curlhandlerchain {
    @override
    public void handle(curlentity entity, string curl) {
        jsonobject body = parsebody(curl);
        entity.setbody(body);
 
        this.log(body);
        super.nexthandle(entity, curl);
    }
 
    private jsonobject parsebody(string curl) {
        matcher formmatcher = curlpatternconstants.http_from_body_pattern.matcher(curl);
        if (formmatcher.find()) {
            return parseformbody(formmatcher);
        }
 
        matcher urlencodematcher = curlpatternconstants.http_urlencode_body_pattern.matcher(curl);
        if (urlencodematcher.find()) {
            return parseurlencodebody(urlencodematcher);
        }
 
        matcher rawmatcher = curlpatternconstants.http_row_body_pattern.matcher(curl);
        if (rawmatcher.find()) {
            return parserowbody(rawmatcher);
        }
 
        matcher defaultmatcher = curlpatternconstants.default_http_body_pattern.matcher(curl);
        if (defaultmatcher.find()) {
            return parsedefaultbody(defaultmatcher);
        }
 
        return new jsonobject();
    }
 
    private jsonobject parsedefaultbody(matcher defaultmatcher) {
        string bodystr = "";
        if (defaultmatcher.group(1) != null) {
            // 单引号包裹的数据
            bodystr = defaultmatcher.group(1);
        } else if (defaultmatcher.group(2) != null) {
            // 双引号包裹的数据
            bodystr = defaultmatcher.group(2);
        } else {
            // 无引号的数据
            bodystr = defaultmatcher.group(3);
        }
 
        // 判断是否是json结构
        if (isjson(bodystr)) {
            return jsonobject.parseobject(bodystr);
        }
 
        // 特殊case: username=test&password=secret
        matcher kvmatcher = curlpatternconstants.default_http_body_pattern_kv.matcher(bodystr);
        return kvmatcher.matches() ? parsekvbody(bodystr) : new jsonobject();
    }
 
    private jsonobject parsekvbody(string kvbodystr) {
        jsonobject json = new jsonobject();
        string[] pairs = kvbodystr.split("&");
        for (string pair : pairs) {
            int idx = pair.indexof("=");
            string key = urldecoder.decode(pair.substring(0, idx), standardcharsets.utf_8);
            string value = urldecoder.decode(pair.substring(idx + 1), standardcharsets.utf_8);
            json.put(key, value);
        }
        return json;
    }
 
    private jsonobject parseformbody(matcher formmatcher) {
        jsonobject formdata = new jsonobject();
 
        // 重置指针匹配的位置
        formmatcher.reset();
        while (formmatcher.find()) {
            // 提取表单项
            string formitem = formmatcher.group(1) != null ? formmatcher.group(1) : formmatcher.group(2);
 
            // 分割键和值
            string[] keyvalue = formitem.split("=", 2);
            if (keyvalue.length == 2) {
                string key = keyvalue[0];
                string value = keyvalue[1];
 
                // 检测文件字段标记
                // ps: 理论上文件标记字段不需要支持
                if (value.startswith("@")) {
                    // 只提取文件名,不读取文件内容
                    formdata.put(key, value.substring(1));
                } else {
                    // 放入表单数据
                    formdata.put(key, value);
                }
            }
        }
 
        return formdata;
    }
 
    private jsonobject parseurlencodebody(matcher urlencodematcher) {
        jsonobject urlencodedata = new jsonobject();
 
        // 重置指针匹配的位置
        urlencodematcher.reset();
        while (urlencodematcher.find()) {
            // 提取键值对字符串
            string keyvalueencoded = urlencodematcher.group(1);
 
            // 分隔键和值
            string[] keyvalue = keyvalueencoded.split("=", 2);
            if (keyvalue.length == 2) {
                string key = keyvalue[0];
                string value = keyvalue[1];
 
                // 对值进行url解码
                string decodedvalue = urldecoder.decode(value, standardcharsets.utf_8);
 
                // 存入数据到json对象
                urlencodedata.put(key, decodedvalue);
            }
        }
 
        return urlencodedata;
    }
 
    private jsonobject parserowbody(matcher rowmatcher) {
        string rawdata = rowmatcher.group(1);
 
        if (isxml(rawdata)) {
            // throw new illegalargumentexception("curl --data-raw content cant' be xml");
            return xml2json(rawdata);
        }
 
        try {
            return json.parseobject(rawdata);
        } catch (exception e) {
            throw new illegalargumentexception("curl --data-raw content is not a valid json");
        }
    }
 
    private boolean isjson(string jsonstr) {
        try {
            jsonobject.parseobject(jsonstr);
            return true;
        } catch (exception e) {
            return false;
        }
    }
 
    public static boolean isxml(string xmlstr) {
        try {
            documentbuilderfactory factory = documentbuilderfactory.newinstance();
            factory.setfeature(securityconstants.ddd, true);
            factory.setfeature(securityconstants.ege, false);
            factory.setfeature(securityconstants.epe, false);
 
            documentbuilder builder = factory.newdocumentbuilder();
            inputsource is = new inputsource(new stringreader(xmlstr));
            builder.parse(is);
            return true;
        } catch (exception e) {
            return false;
        }
    }
 
    private jsonobject xml2json(string xmlstr) {
        try {
            org.json.jsonobject orgjsonobj = xml.tojsonobject(xmlstr);
            string jsonstring = orgjsonobj.tostring();
            return json.parseobject(jsonstring);
        } catch (jsonexception e) {
            throw new linkconsoleexception("curl --data-raw content xml2json error", e);
        }
    }
 
    @override
    public void log(object... logparams) {
        logprinter.info("httpbodyhandler execute: body={}", logparams);
    }
}

curlpatternconstants

正则匹配常量定义:

public interface curlpatternconstants {
 
    /**
     * curl基本结构校验
     */
    pattern curl_basic_structure_pattern = pattern.compile("^curl (\\s+)");
 
    /**
     * url路径匹配
     */
    pattern url_path_pattern =
            pattern.compile("(?:\\s|^)(?:'|\")?(https?://[^?\\s'\"]*)(?:\\?[^\\s'\"]*)?(?:'|\")?(?:\\s|$)");
 
    /**
     * 请求参数列表匹配
     */
    pattern url_params_pattern = pattern.compile("(?:\\s|^)(?:'|\")?(https?://[^\\s'\"]+)(?:'|\")?(?:\\s|$)");
 
    /**
     * http请求方法匹配
     */
    //pattern http_method_pattern = pattern.compile("(?:-x|--request)\\s+(\\s+)");
    pattern http_method_pattern = pattern.compile("curl\\s+[^\\s]*\\s+(?:-x|--request)\\s+'?(get|post)'?");
 
    /**
     * 默认http请求方法匹配
     */
    pattern default_http_method_pattern = pattern.compile(".*\\s(-d|--data|--data-binary)\\s.*");
 
    /**
     * 请求头匹配
     */
    pattern curl_headers_pattern = pattern.compile("(?:-h|--header)\\s+'(.*?:.*?)'");
 
    /**
     * -d/--data 请求体匹配
     */
    pattern default_http_body_pattern = pattern.compile("(?:--data|-d)\\s+(?:'([^']*)'|\"([^\"]*)\"|(\\s+))", pattern.dotall);
    pattern default_http_body_pattern_kv = pattern.compile("^([^=&]+=[^=&]+)(?:&[^=&]+=[^=&]+)*$", pattern.dotall);
 
    /**
     * --data-raw 请求体匹配
     */
    pattern http_row_body_pattern = pattern.compile("--data-raw '(.+?)'(?s)", pattern.dotall);
 
    /**
     * --form 请求体匹配
     */
    pattern http_from_body_pattern = pattern.compile("--form\\s+'(.*?)'|-f\\s+'(.*?)'");
 
 
    /**
     * --data-urlencode 请求体匹配
     */
    pattern http_urlencode_body_pattern = pattern.compile("--data-urlencode\\s+'(.*?)'");
 
}

其他代码

public class securityconstants {
    public static string ddd = "http://apache.org/xml/features/disallow-doctype-decl";
    public static string ege = "http://xml.org/sax/features/external-general-entities";
    public static string epe = "http://xml.org/sax/features/external-parameter-entities";
    public static string led = "http://apache.org/xml/features/nonvalidating/load-external-dtd";
}

以上就是使用java实现一个解析curl脚本小工具的详细内容,更多关于java实现解析curl工具的资料请关注代码网其它相关文章!

(0)

相关文章:

版权声明:本文内容由互联网用户贡献,该文观点仅代表作者本人。本站仅提供信息存储服务,不拥有所有权,不承担相关法律责任。 如发现本站有涉嫌抄袭侵权/违法违规的内容, 请发送邮件至 2386932994@qq.com 举报,一经查实将立刻删除。

发表评论

验证码:
Copyright © 2017-2025  代码网 保留所有权利. 粤ICP备2024248653号
站长QQ:2386932994 | 联系邮箱:2386932994@qq.com