+ "一": 1,
+ "壹": 1,
+ "幺": 1,
+ "二": 2,
+ "贰": 2,
+ "两": 2,
+ "三": 3,
+ "叁": 3,
+ "四": 4,
+ "肆": 4,
+ "五": 5,
+ "伍": 5,
+ "六": 6,
+ "陆": 6,
+ "七": 7,
+ "柒": 7,
+ "八": 8,
+ "捌": 8,
+ "九": 9,
+ "玖": 9,
+}
+UNIT_CN2AN = {
+ "十": 10,
+ "拾": 10,
+ "百": 100,
+ "佰": 100,
+ "千": 1000,
+ "仟": 1000,
+ "万": 10000,
+ "亿": 100000000,
+}
+UNIT_LOW_AN2CN = {
+ 10: "十",
+ 100: "百",
+ 1000: "千",
+ 10000: "万",
+ 100000000: "亿",
+}
+NUMBER_LOW_AN2CN = {
+ 0: "零",
+ 1: "一",
+ 2: "二",
+ 3: "三",
+ 4: "四",
+ 5: "五",
+ 6: "六",
+ 7: "七",
+ 8: "八",
+ 9: "九",
+}
+NUMBER_UP_AN2CN = {
+ 0: "零",
+ 1: "壹",
+ 2: "贰",
+ 3: "叁",
+ 4: "肆",
+ 5: "伍",
+ 6: "陆",
+ 7: "柒",
+ 8: "捌",
+ 9: "玖",
+}
+UNIT_LOW_ORDER_AN2CN = [
+ "",
+ "十",
+ "百",
+ "千",
+ "万",
+ "十",
+ "百",
+ "千",
+ "亿",
+ "十",
+ "百",
+ "千",
+ "万",
+ "十",
+ "百",
+ "千",
+]
+UNIT_UP_ORDER_AN2CN = [
+ "",
+ "拾",
+ "佰",
+ "仟",
+ "万",
+ "拾",
+ "佰",
+ "仟",
+ "亿",
+ "拾",
+ "佰",
+ "仟",
+ "万",
+ "拾",
+ "佰",
+ "仟",
+]
+STRICT_CN_NUMBER = {
+ "零": "零",
+ "一": "一壹",
+ "二": "二贰",
+ "三": "三叁",
+ "四": "四肆",
+ "五": "五伍",
+ "六": "六陆",
+ "七": "七柒",
+ "八": "八捌",
+ "九": "九玖",
+ "十": "十拾",
+ "百": "百佰",
+ "千": "千仟",
+ "万": "万",
+ "亿": "亿",
+}
+NORMAL_CN_NUMBER = {
+ "零": "零〇",
+ "一": "一壹幺",
+ "二": "二贰两",
+ "三": "三叁仨",
+ "四": "四肆",
+ "五": "五伍",
+ "六": "六陆",
+ "七": "七柒",
+ "八": "八捌",
+ "九": "九玖",
+ "十": "十拾",
+ "百": "百佰",
+ "千": "千仟",
+ "万": "万",
+ "亿": "亿",
+}
@@ -0,0 +1,29 @@ |
||
1 |
+import torbjorn as tbn |
|
2 |
+ |
|
3 |
+from .an2cn import An2Cn |
|
4 |
+from .cn2an import Cn2An |
|
5 |
+ |
|
6 |
+ac = An2Cn() |
|
7 |
+ca = Cn2An() |
|
8 |
+ |
|
9 |
+an = 9876543298765432 |
|
10 |
+cn = "九千八百七十六万五千四百三十二亿九千八百七十六万五千四百三十二" |
|
11 |
+ |
|
12 |
+ |
|
13 |
+@tbn.run_time |
|
14 |
+def run_cn2an_ten_thousand_times() -> None: |
|
15 |
+ for _ in range(10000): |
|
16 |
+ result = ca.cn2an(cn) |
|
17 |
+ assert result == an |
|
18 |
+ |
|
19 |
+ |
|
20 |
+@tbn.run_time |
|
21 |
+def run_an2cn_ten_thousand_times() -> None: |
|
22 |
+ for _ in range(10000): |
|
23 |
+ result = ac.an2cn(an) |
|
24 |
+ assert result == cn |
|
25 |
+ |
|
26 |
+ |
|
27 |
+if __name__ == '__main__': |
|
28 |
+ run_cn2an_ten_thousand_times() |
|
29 |
+ run_an2cn_ten_thousand_times() |
@@ -0,0 +1,104 @@ |
||
1 |
+import re |
|
2 |
+from warnings import warn |
|
3 |
+ |
|
4 |
+from .cn2an import Cn2An |
|
5 |
+from .an2cn import An2Cn |
|
6 |
+from .conf import UNIT_CN2AN |
|
7 |
+ |
|
8 |
+ |
|
9 |
+class Transform(object): |
|
10 |
+ def __init__(self) -> None: |
|
11 |
+ self.all_num = "零一二三四五六七八九" |
|
12 |
+ self.all_unit = "".join(list(UNIT_CN2AN.keys())) |
|
13 |
+ self.cn2an = Cn2An().cn2an |
|
14 |
+ self.an2cn = An2Cn().an2cn |
|
15 |
+ self.cn_pattern = f"负?([{self.all_num}{self.all_unit}]+点)?[{self.all_num}{self.all_unit}]+" |
|
16 |
+ self.smart_cn_pattern = f"-?([0-9]+.)?[0-9]+[{self.all_unit}]+" |
|
17 |
+ |
|
18 |
+ def transform(self, inputs: str, method: str = "cn2an") -> str: |
|
19 |
+ if method == "cn2an": |
|
20 |
+ inputs = inputs.replace("廿", "二十").replace("半", "0.5").replace("两", "2") |
|
21 |
+ # date |
|
22 |
+ inputs = re.sub( |
|
23 |
+ fr"((({self.smart_cn_pattern})|({self.cn_pattern}))年)?([{self.all_num}十]+月)?([{self.all_num}十]+日)?", |
|
24 |
+ lambda x: self.__sub_util(x.group(), "cn2an", "date"), inputs) |
|
25 |
+ # fraction |
|
26 |
+ inputs = re.sub(fr"{self.cn_pattern}分之{self.cn_pattern}", |
|
27 |
+ lambda x: self.__sub_util(x.group(), "cn2an", "fraction"), inputs) |
|
28 |
+ # percent |
|
29 |
+ inputs = re.sub(fr"百分之{self.cn_pattern}", |
|
30 |
+ lambda x: self.__sub_util(x.group(), "cn2an", "percent"), inputs) |
|
31 |
+ # celsius |
|
32 |
+ inputs = re.sub(fr"{self.cn_pattern}摄氏度", |
|
33 |
+ lambda x: self.__sub_util(x.group(), "cn2an", "celsius"), inputs) |
|
34 |
+ # number |
|
35 |
+ output = re.sub(self.cn_pattern, |
|
36 |
+ lambda x: self.__sub_util(x.group(), "cn2an", "number"), inputs) |
|
37 |
+ |
|
38 |
+ elif method == "an2cn": |
|
39 |
+ # date |
|
40 |
+ inputs = re.sub(r"(\d{2,4}年)?(\d{1,2}月)?(\d{1,2}日)?", |
|
41 |
+ lambda x: self.__sub_util(x.group(), "an2cn", "date"), inputs) |
|
42 |
+ # fraction |
|
43 |
+ inputs = re.sub(r"\d+/\d+", |
|
44 |
+ lambda x: self.__sub_util(x.group(), "an2cn", "fraction"), inputs) |
|
45 |
+ # percent |
|
46 |
+ inputs = re.sub(r"-?(\d+\.)?\d+%", |
|
47 |
+ lambda x: self.__sub_util(x.group(), "an2cn", "percent"), inputs) |
|
48 |
+ # celsius |
|
49 |
+ inputs = re.sub(r"\d+℃", |
|
50 |
+ lambda x: self.__sub_util(x.group(), "an2cn", "celsius"), inputs) |
|
51 |
+ # number |
|
52 |
+ output = re.sub(r"-?(\d+\.)?\d+", |
|
53 |
+ lambda x: self.__sub_util(x.group(), "an2cn", "number"), inputs) |
|
54 |
+ else: |
|
55 |
+ raise ValueError(f"error method: {method}, only support 'cn2an' and 'an2cn'!") |
|
56 |
+ |
|
57 |
+ return output |
|
58 |
+ |
|
59 |
+ def __sub_util(self, inputs, method: str = "cn2an", sub_mode: str = "number") -> str: |
|
60 |
+ try: |
|
61 |
+ if inputs: |
|
62 |
+ if method == "cn2an": |
|
63 |
+ if sub_mode == "date": |
|
64 |
+ return re.sub(fr"(({self.smart_cn_pattern})|({self.cn_pattern}))", |
|
65 |
+ lambda x: str(self.cn2an(x.group(), "smart")), inputs) |
|
66 |
+ elif sub_mode == "fraction": |
|
67 |
+ if inputs[0] != "百": |
|
68 |
+ frac_result = re.sub(self.cn_pattern, |
|
69 |
+ lambda x: str(self.cn2an(x.group(), "smart")), inputs) |
|
70 |
+ numerator, denominator = frac_result.split("分之") |
|
71 |
+ return f"{denominator}/{numerator}" |
|
72 |
+ else: |
|
73 |
+ return inputs |
|
74 |
+ elif sub_mode == "percent": |
|
75 |
+ return re.sub(f"(?<=百分之){self.cn_pattern}", |
|
76 |
+ lambda x: str(self.cn2an(x.group(), "smart")), inputs).replace("百分之", "") + "%" |
|
77 |
+ elif sub_mode == "celsius": |
|
78 |
+ return re.sub(f"{self.cn_pattern}(?=摄氏度)", |
|
79 |
+ lambda x: str(self.cn2an(x.group(), "smart")), inputs).replace("摄氏度", "℃") |
|
80 |
+ elif sub_mode == "number": |
|
81 |
+ return str(self.cn2an(inputs, "smart")) |
|
82 |
+ else: |
|
83 |
+ raise Exception(f"error sub_mode: {sub_mode} !") |
|
84 |
+ else: |
|
85 |
+ if sub_mode == "date": |
|
86 |
+ inputs = re.sub(r"\d+(?=年)", |
|
87 |
+ lambda x: self.an2cn(x.group(), "direct"), inputs) |
|
88 |
+ return re.sub(r"\d+", |
|
89 |
+ lambda x: self.an2cn(x.group(), "low"), inputs) |
|
90 |
+ elif sub_mode == "fraction": |
|
91 |
+ frac_result = re.sub(r"\d+", lambda x: self.an2cn(x.group(), "low"), inputs) |
|
92 |
+ numerator, denominator = frac_result.split("/") |
|
93 |
+ return f"{denominator}分之{numerator}" |
|
94 |
+ elif sub_mode == "celsius": |
|
95 |
+ return self.an2cn(inputs[:-1], "low") + "摄氏度" |
|
96 |
+ elif sub_mode == "percent": |
|
97 |
+ return "百分之" + self.an2cn(inputs[:-1], "low") |
|
98 |
+ elif sub_mode == "number": |
|
99 |
+ return self.an2cn(inputs, "low") |
|
100 |
+ else: |
|
101 |
+ raise Exception(f"error sub_mode: {sub_mode} !") |
|
102 |
+ except Exception as e: |
|
103 |
+ warn(str(e)) |
|
104 |
+ return inputs |
@@ -0,0 +1,40 @@ |
||
1 |
+import unittest |
|
2 |
+ |
|
3 |
+from .transform import Transform |
|
4 |
+ |
|
5 |
+ |
|
6 |
+class TransformTest(unittest.TestCase): |
|
7 |
+ def setUp(self) -> None: |
|
8 |
+ self.strict_data_dict = { |
|
9 |
+ "小王捡了100块钱": "小王捡了一百块钱", |
|
10 |
+ "用户增长最快的3个城市": "用户增长最快的三个城市", |
|
11 |
+ "小王的生日是2001年3月4日": "小王的生日是二零零一年三月四日", |
|
12 |
+ "小王的生日是2012年12月12日": "小王的生日是二零一二年十二月十二日", |
|
13 |
+ "今天股价上涨了8%": "今天股价上涨了百分之八", |
|
14 |
+ "第2天股价下降了-3.8%": "第二天股价下降了百分之负三点八", |
|
15 |
+ "抛出去的硬币为正面的概率是1/2": "抛出去的硬币为正面的概率是二分之一", |
|
16 |
+ "现在室内温度为39℃,很热啊!": "现在室内温度为三十九摄氏度,很热啊!", |
|
17 |
+ "创业板指9月9日早盘低开1.57%": "创业板指九月九日早盘低开百分之一点五七" |
|
18 |
+ } |
|
19 |
+ |
|
20 |
+ self.smart_data_dict = { |
|
21 |
+ "约2.5亿年~6500万年": "约250000000年~65000000年", |
|
22 |
+ "廿二日,日出东方": "22日,日出东方", |
|
23 |
+ "大陆": "大陆", |
|
24 |
+ "半斤": "0.5斤", |
|
25 |
+ "两个": "2个", |
|
26 |
+ } |
|
27 |
+ |
|
28 |
+ self.t = Transform() |
|
29 |
+ |
|
30 |
+ def test_transform(self) -> None: |
|
31 |
+ for strict_item in self.strict_data_dict.keys(): |
|
32 |
+ self.assertEqual(self.t.transform(strict_item, "an2cn"), self.strict_data_dict[strict_item]) |
|
33 |
+ self.assertEqual(self.t.transform(self.strict_data_dict[strict_item], "cn2an"), strict_item) |
|
34 |
+ |
|
35 |
+ for smart_item in self.smart_data_dict.keys(): |
|
36 |
+ self.assertEqual(self.t.transform(smart_item, "cn2an"), self.smart_data_dict[smart_item]) |
|
37 |
+ |
|
38 |
+ |
|
39 |
+if __name__ == '__main__': |
|
40 |
+ unittest.main() |