一、需求
根据ocr识别出的身份证地址或者用户输入的地址,智能识别出用户的省、市、区/县和详细地址信息。
二、思路
解析给到接口的完整地址fulladdress,逐个匹配省province、市city、区/县area、详细地址detail,然后将 province+city+area+detail 返回即可。
ps:需要考虑一些情况:
1、四个直辖市(北京市、上海市、天津市、重庆市)。直辖市的完整地址fulladdress只有省、县/区
2、完整地址fulladdress只有省、区/县
3、市与区/县重名
4、区/县与区/县重名
三、实现代码
import lombok.extern.slf4j.slf4j;
import org.apache.commons.lang3.stringutils;
import java.util.*;
import java.util.regex.matcher;
import java.util.regex.pattern;
/**
* 身份证信息分析工具类util
*/
@slf4j
public class analysisidcardutil {
/**
* 获取详细地址
*
* @param fulladdress 身份证完整地址
*/
public static string getdetailedaddress(string fulladdress) {
string[] addressarrays = splicedetailedaddress(fulladdress);
return addressarrays[addressarrays.length - 1];
}
/**
* 获取省市区地址,如:安徽省合肥市包河区
*
* @param fulladdress 身份证完整地址
*/
public static string getpcaaddress(string fulladdress) {
string[] addressarrays = splicedetailedaddress(fulladdress);
stringbuilder areabuffer = new stringbuilder();
for (int i = 0; i < addressarrays.length - 1; i++) {
if (stringutils.isnotempty(addressarrays[i])) {
areabuffer.append(addressarrays[i]);
}
}
return areabuffer.tostring();
}
/**
* 身份证完成地址拆分为[省、市、区、详细地址]数组,如:[安徽省, 合肥市, 包河区, 幸福大街001号]
*
* @param fulladdress 身份证完整地址
*/
public static string[] splicedetailedaddress(string fulladdress) {
string[] arr = new string[4];
try {
string tempstr = fulladdress;
// 省
string province = null;
int provinceidx = processprovince(tempstr);
if (provinceidx > -1) {
province = tempstr.substring(0, provinceidx + 1);
tempstr = tempstr.substring(provinceidx + 1);
}
// 市
string city = null;
int cityidx = processcity(tempstr);
if (cityidx > -1) {
city = tempstr.substring(0, cityidx + 1);
tempstr = tempstr.substring(cityidx + 1);
}
// 区
string county = null;
int countyidx = processcounty(tempstr);
if (countyidx > -1) {
county = tempstr.substring(0, countyidx + 1);
tempstr = tempstr.substring(countyidx + 1);
}
string street = tempstr;
arr[0] = province;
arr[1] = city;
arr[2] = county;
arr[3] = street;
} catch (exception e) {
log.error("身份证详细地址转义失败:{}", e.getmessage());
}
return arr;
}
/**
* 拆分身份证完整地址中的省份信息
*
* @param address 地址
*/
private static int processprovince(string address) {
int[] idx = new int[3];
int provinceidx;
if ((provinceidx = address.indexof("省")) > -1) {
idx[0] = provinceidx;
}
if ((provinceidx = address.indexof("市")) > -1) {
idx[1] = provinceidx;
}
if ((provinceidx = address.indexof("区")) > -1) {
idx[2] = provinceidx;
}
arrays.sort(idx);
for (int j : idx) {
if (j > 0) {
return j;
}
}
return provinceidx;
}
/**
* 拆分身份证完整地址中的市区/县/自治州信息
*
* @param address 地址
*/
private static int processcity(string address) {
int[] idx = new int[7];
int cityidx = -1;
if ((cityidx = address.indexof("县")) > -1) {
idx[0] = cityidx;
}
if ((cityidx = address.indexof("自治州")) > -1) {
idx[1] = cityidx + 2;
}
if ((cityidx = address.indexof("市辖区")) > -1) {
idx[2] = cityidx + 2;
}
if ((cityidx = address.indexof("市")) > -1) {
idx[3] = cityidx;
}
if ((cityidx = address.indexof("区")) > -1) {
idx[4] = cityidx;
}
if ((cityidx = address.indexof("地区")) > -1) {
idx[5] = cityidx + 1;
}
if ((cityidx = address.indexof("盟")) > -1) {
idx[6] = cityidx;
}
arrays.sort(idx);
for (int j : idx) {
if (j > 0) {
return j;
}
}
return cityidx;
}
/**
* 拆分身份证完整地址中的县/旗/岛信息
*
* @param address 地址
*/
private static int processcounty(string address) {
int[] idx = new int[6];
int countyidx;
if ((countyidx = address.indexof("县")) > -1) {
idx[0] = countyidx;
}
if ((countyidx = address.indexof("旗")) > -1) {
idx[1] = countyidx;
}
if ((countyidx = address.indexof("海域")) > -1) {
idx[2] = countyidx + 1;
}
if ((countyidx = address.indexof("市")) > -1) {
idx[3] = countyidx;
}
if ((countyidx = address.indexof("区")) > -1) {
idx[4] = countyidx;
}
if ((countyidx = address.indexof("岛")) > -1) {
idx[5] = countyidx;
}
arrays.sort(idx);
for (int j : idx) {
if (j > 0) {
return j;
}
}
return countyidx;
}
/**
* 身份证地址提取省市区
*
* @param fulladdress 身份证完整地址
*/
public static map<string, string> addressresolution(string fulladdress) {
// 定义正则
string regex = "(?<province>[^省]+自治区|.*?省|.*?行政区|.*?市)(?<city>[^市]+自治州|.*?地区|.*?行政单位|.+盟|市辖区|.*?市|.*?县)(?<area>[^县]+县|.+区|.+市|.+旗|.+海域|.+岛)?(?<town>[^区]+区|.+镇)?(?<detail>.*)";
matcher matcher = pattern.compile(regex).matcher(fulladdress);
string province, city, area, town, detail;
map<string, string> map = new linkedhashmap<>();
while (matcher.find()) {
province = matcher.group("province");
map.put("province", stringutils.isempty(province) ? "" : province.trim());
city = matcher.group("city");
map.put("city", stringutils.isempty(city) ? "" : city.trim());
area = matcher.group("area");
map.put("area", stringutils.isempty(area) ? "" : area.trim());
town = matcher.group("town");
map.put("town", stringutils.isempty(town) ? "" : town.trim());
detail = matcher.group("detail");
map.put("detail", stringutils.isempty(detail) ? "" : detail.trim());
}
return map;
}
public static void main(string[] args) {
string address1 = "上海市上海市浦东新区世纪大道xx号上海中心大厦xx楼a座";
string address2 = "内蒙古自治区呼伦贝尔市鄂温克族自治旗额尔古纳市阿尔山北路xxx号蒙古民族文化博物馆x楼xx展厅";
string address3 = "广东省广州市番禺区沙湾镇大巷涌路xxx号";
system.out.println("详细地址1:" + getdetailedaddress(address1));
system.out.println("详细地址2:" + getdetailedaddress(address2));
system.out.println("省市区地址拼接1:" + getpcaaddress(address1));
system.out.println("省市区地址拼接2:" + getpcaaddress(address2));
system.out.println("地址数组1:" + arrays.tostring(splicedetailedaddress(address1)));
system.out.println("地址数组2:" + arrays.tostring(splicedetailedaddress(address2)));
system.out.println("地址提取省市区:" + addressresolution(address2));
system.out.println("地址提取省市区:" + addressresolution(address3));
}
}测试结果:

到此这篇关于java实现根据地址智能识别省市区县的文章就介绍到这了,更多相关java地址识别省市区县内容请搜索代码网以前的文章或继续浏览下面的相关文章希望大家以后多多支持代码网!
发表评论