chore: release updated anxinyan version

This commit is contained in:
wushumin
2026-05-22 21:13:52 +08:00
parent 7e86e2a5ec
commit 78098851f9
29 changed files with 1949 additions and 184 deletions

View File

@@ -0,0 +1,213 @@
import regionSource from "../static/regions/pca.json";
type RegionNode = {
code: string;
name: string;
children?: RegionNode[];
};
export type RecognizedReturnAddress = {
consignee: string;
mobile: string;
province: string;
city: string;
district: string;
detail_address: string;
};
export type RecognizeReturnAddressResult = {
ok: boolean;
address?: RecognizedReturnAddress;
message?: string;
};
const regionTree = regionSource as RegionNode[];
const nameLabels = ["收货人", "收件人", "姓名", "联系人", "取件人"];
const mobileLabels = ["收货电话", "联系电话", "手机号", "手机号码", "手机", "电话"];
const addressLabels = ["收货地址", "收件地址", "寄回地址", "地址"];
const allLabels = [...nameLabels, ...mobileLabels, ...addressLabels];
function normalizeLines(raw: string) {
return raw
.replace(/\r/g, "\n")
.split("\n")
.map((line) => line.trim())
.filter(Boolean);
}
function labelMatch(line: string, labels: string[]) {
for (const label of labels) {
const pattern = new RegExp(`^\\s*${label}\\s*[:]?\\s*(.*)$`);
const match = line.match(pattern);
if (match) {
return { label, value: String(match[1] || "").trim() };
}
}
return null;
}
function isKnownLabel(line: string) {
const normalized = line.replace(/\s+/g, "");
return allLabels.some((label) => normalized === label || normalized === `${label}:` || normalized === `${label}`);
}
function extractLabeledValue(lines: string[], labels: string[], block = false) {
for (let index = 0; index < lines.length; index += 1) {
const match = labelMatch(lines[index], labels);
if (!match) continue;
if (match.value) return match.value;
const values: string[] = [];
for (let nextIndex = index + 1; nextIndex < lines.length; nextIndex += 1) {
const nextLine = lines[nextIndex];
if (labelMatch(nextLine, allLabels) && isKnownLabel(nextLine)) {
break;
}
if (labelMatch(nextLine, allLabels)?.value) {
break;
}
values.push(nextLine);
if (!block) break;
}
return values.join(block ? "" : " ").trim();
}
return "";
}
function normalizeMobile(value: string) {
const directMatch = value.match(/1[3-9]\d{9}/);
if (directMatch) return directMatch[0];
const digits = value.replace(/\D+/g, "");
return digits.match(/1[3-9]\d{9}/)?.[0] || "";
}
function stripKnownAddressPrefixes(value: string) {
return value
.replace(/\s+/g, "")
.replace(/^(中国大陆|中华人民共和国|中国|大陆)+/, "")
.replace(/^(收货地址|收件地址|寄回地址|地址)[:]?/, "");
}
function aliases(name: string) {
const suffixes = ["特别行政区", "壮族自治区", "回族自治区", "维吾尔自治区", "自治区", "自治州", "自治县", "地区", "省", "市", "区", "县", "旗", "盟"];
const values = [name];
for (const suffix of suffixes) {
if (name.endsWith(suffix) && name.length > suffix.length) {
values.push(name.slice(0, -suffix.length));
}
}
return Array.from(new Set(values)).sort((a, b) => b.length - a.length);
}
function consumePrefix(text: string, names: string[]) {
for (const name of names) {
if (name && text.startsWith(name)) {
return { consumed: name.length, rest: text.slice(name.length) };
}
}
return null;
}
function isDirectCity(province: RegionNode, city: RegionNode) {
return province.name === city.name || aliases(province.name).some((name) => aliases(city.name).includes(name));
}
function matchDistrict(city: RegionNode, text: string) {
for (const district of city.children || []) {
const match = consumePrefix(text, aliases(district.name));
if (match) {
return { district, detail: match.rest };
}
}
return null;
}
function matchRegion(addressText: string) {
const address = stripKnownAddressPrefixes(addressText);
if (!address) return null;
for (const province of regionTree) {
const provinceMatch = consumePrefix(address, aliases(province.name));
if (!provinceMatch) continue;
for (const city of province.children || []) {
const cityMatch = consumePrefix(provinceMatch.rest, aliases(city.name));
const districtSource = cityMatch ? cityMatch.rest : (isDirectCity(province, city) ? provinceMatch.rest : "");
if (!districtSource) continue;
const districtMatch = matchDistrict(city, districtSource);
if (districtMatch) {
return {
province: province.name,
city: city.name,
district: districtMatch.district.name,
detail_address: districtMatch.detail,
};
}
}
}
for (const province of regionTree) {
for (const city of province.children || []) {
const cityMatch = consumePrefix(address, aliases(city.name));
if (!cityMatch) continue;
const districtMatch = matchDistrict(city, cityMatch.rest);
if (districtMatch) {
return {
province: province.name,
city: city.name,
district: districtMatch.district.name,
detail_address: districtMatch.detail,
};
}
}
}
return null;
}
function fallbackAddressLine(lines: string[], consignee: string, mobile: string) {
return lines
.map((line) => labelMatch(line, allLabels)?.value || line)
.filter((line) => line && line !== consignee && !line.includes(mobile) && !normalizeMobile(line))
.sort((a, b) => b.length - a.length)[0] || "";
}
export function recognizeReturnAddress(raw: string): RecognizeReturnAddressResult {
const lines = normalizeLines(raw);
if (!lines.length) {
return { ok: false, message: "请先粘贴寄回地址信息" };
}
const consignee = extractLabeledValue(lines, nameLabels).trim();
const mobile = normalizeMobile(extractLabeledValue(lines, mobileLabels) || raw);
const addressText = extractLabeledValue(lines, addressLabels, true) || fallbackAddressLine(lines, consignee, mobile);
const region = matchRegion(addressText);
if (!consignee) {
return { ok: false, message: "未识别到收件人,请检查文本中是否包含收货人或收件人" };
}
if (!mobile) {
return { ok: false, message: "未识别到有效手机号,请检查文本中的收货电话" };
}
if (!region) {
return { ok: false, message: "未识别到省市区,请检查地址是否包含城市和区县" };
}
if (!region.detail_address.trim()) {
return { ok: false, message: "未识别到详细地址,请检查区县后的街道门牌信息" };
}
return {
ok: true,
address: {
consignee,
mobile,
province: region.province,
city: region.city,
district: region.district,
detail_address: region.detail_address.trim(),
},
};
}