import pandas as pd
from typing import List, Optional, Dict, Tuple
import os

class AreaCodeTool:
    def __init__(self, csv_path: str = None):
        """
        初始化行政区划代码工具
        
        Args:
            csv_path: CSV文件路径,如果为None则使用默认路径
        """
        if csv_path is None:
            # 获取当前文件所在目录
            current_dir = os.path.dirname(os.path.abspath(__file__))
            csv_path = os.path.join(current_dir, "area_code.csv")
        
        # 读取CSV文件
        self.df = pd.read_csv(csv_path, dtype={'code': str})
        # 确保code列为字符串类型
        self.df['code'] = self.df['code'].astype(str)
        # 构建区域名称到代码的映射
        self._build_name_maps()

    def _build_name_maps(self):
        """
            构建区域名称到代码的映射
            目前读取excel表格就能获取省市县的映射,也可能市省、省市的映射
            我们可以直接以省过滤,就能得到市级、市县的映射、
            再根据市级过滤、得到县级映射
        """
        self.full_name_map = dict(zip(self.df['name'], self.df['code']))

        # 构建二级映射
        self.second_map = {}
        # 构建三级映射
        self.third_map = {}
        
        for _, row in self.df.iterrows():
            name = row['name'].strip()
            code = row['code']
            parts = name.split('省' if '省' in name else '市')
            
            if '省' in name:
                if len(parts) > 1 and parts[1]:
                    self.second_map[parts[1]] = code
                    #考虑直辖县的情况
                    if '市' in name:
                        city_parts = parts[1].split('市')
                        if len(city_parts) > 1 and city_parts[1]:
                            district = city_parts[1]
                            self.third_map[district] = code
                    if '自治州' in name:
                        city_parts = parts[1].split('自治州')
                        if len(city_parts) > 1 and city_parts[1]:
                            district = city_parts[1]
                            self.third_map[district] = code

            else:
                # 处理直辖市等特殊情况
                if len(parts) > 1 and parts[1]:
                    self.third_map[parts[1]] = code


    def find_code(self, area_name: str) -> List[Tuple[str, str]]:
        """
        查找区域代码
        
        Args:
            area_name: 区域名称,可以是完整或部分名称
            
        Returns:
            List[Tuple[str, str]]: 返回匹配的(区域名称, 代码)列表
        """
        results = []
        
        # 尝试完整匹配
        if area_name in self.full_name_map:
            results.append((area_name, self.full_name_map[area_name]))
            return results
        
        # 尝试二级匹配
        if area_name.endswith('市') and area_name in self.second_map:
            results.append((area_name, self.second_map[area_name]))
            return results
            
        # 尝试三级级匹配
        if area_name in self.third_map:
            results.append((area_name, self.third_map[area_name]))
            return results
            
        # 模糊匹配
        if not results:
            for name in self.full_name_map.keys():
                if self.is_subsequence(name,area_name):
                    results.append((area_name, self.full_name_map[name]))
                    return results

            # mask = self.df['name'].str.contains(area_name, na=False)
            # matches = self.df[mask]
            # results.extend([(row['name'], row['code']) for _, row in matches.iterrows()])
            
        return results

    def get_full_name(self, code: str) -> Optional[str]:
        """
        根据代码获取完整的区域名称
        
        Args:
            code: 区域代码
            
        Returns:
            Optional[str]: 完整的区域名称,如果未找到则返回None
        """
        mask = self.df['code'] == code
        matches = self.df[mask]
        if not matches.empty:
            return matches.iloc[0]['name']
        return None

    def is_subsequence(self,source, target):
        # 初始化两个索引,分别指向源字符串和目标字符串的开头
        source_index = 0
        target_index = 0

        # 遍历源字符串,直到找到目标字符串的所有字符或者遍历完源字符串
        while source_index < len(source) and target_index < len(target):
            # 如果当前源字符串的字符等于目标字符串的字符,则移动目标字符串的索引
            if source[source_index] == target[target_index]:
                target_index += 1
            # 无论是否匹配,都移动源字符串的索引
            source_index += 1

        # 如果目标字符串的索引已经遍历完,说明找到了完整的子序列
        return target_index == len(target)

# 使用示例
def example_usage():
    tool = AreaCodeTool()
    
    # 测试不同类型的查询
    test_cases = [
        "贵州省",
        "贵阳市",
        "云岩区",
        "贵州省贵阳市",
        "贵州省贵阳市南明区",
        "贵阳",  # 模糊查询
        "贵州贵阳" , # 模糊查询
        "贵州贵阳南明",  # 模糊查询
        "贵州贵阳市南明" , # 模糊查询
    ]
    
    for query in test_cases:
        print(f"\n查询: {query}")
        results = tool.find_code(query)
        for name, code in results:
            print(f"匹配结果: {name} -> {code}")
            
    # 测试代码反查
    code = "340802"  # 安徽省安庆市迎江区
    full_name = tool.get_full_name(code)
    if full_name:
        print(f"\n代码反查: {code} -> {full_name}")

if __name__ == "__main__":
    example_usage()