目的是为了方便给网页版ai分析,配合之前的tree命令给ai看文件目录结构

tree -L 10 -I 'target|node_modules|.git|logs|*.class' > project-tree.txt

import os

# ========= 配置 =========

# ========= 忽略目录 =========
IGNORE_DIRS = {
    ".git",
    ".idea",
    ".mvn",

    "target",          # Maven 构建输出(最重要)
    "node_modules",    # 前端依赖(如果有)
    "dist",
    "build",

    "__pycache__",     # 防止误扫 Python 缓存
    "logs",            # 日志目录(常见)
}

# ========= 忽略文件 =========
IGNORE_FILES = {
    "output.md",
    "output_black.md",
    "output_white.md",
    ".DS_Store",
}

# ========= 忽略后缀 =========
IGNORE_SUFFIX = {
    # 二进制 / 编译产物
    ".class", ".jar", ".war", ".ear",

    # 压缩包
    ".zip", ".tar", ".gz", ".7z",

    # 图片 / 媒体
    ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".svg",

    # 可执行 / 动态库
    ".exe", ".dll", ".so", ".dylib",

    # 日志
    ".log",

    # IDE / 系统
    ".iml",

    # 其他无意义文件
    ".lock"
}

# ========= 保留后缀 =========
ALLOW_SUFFIX = {
    ".java",
    ".xml",
    ".yml", ".yaml",
    ".properties",
    ".md",
    ".sql",
    ".sh",
    ".ftl",   # ✔ FreeMarker 模板
}

OUTPUT_FILE = "output_white.md"

# 根据后缀映射 Markdown 代码块语言
LANG_MAP = {
    ".py": "python",
    ".java": "java",
    ".js": "javascript",
    ".ts": "typescript",
    ".json": "json",
    ".xml": "xml",
    ".html": "html",
    ".css": "css",
    ".sh": "bash",
    ".yml": "yaml",
    ".yaml": "yaml",
    ".md": "markdown",
    ".go": "go",
    ".rs": "rust",
    ".ftl": "freemarker",   # ✔ 关键新增
}


# ========= 工具函数 =========

def should_ignore_file(filename):
    if filename in IGNORE_FILES:
        return True

    # 只保留允许类型(关键)
    if not any(filename.endswith(suf) for suf in ALLOW_SUFFIX):
        return True

    return False


def detect_language(filename):
    _, ext = os.path.splitext(filename)
    return LANG_MAP.get(ext.lower(), "")


def read_file_safe(path):
    try:
        with open(path, "r", encoding="utf-8") as f:
            return f.read()
    except Exception:
        try:
            with open(path, "r", encoding="gbk") as f:
                return f.read()
        except Exception:
            return None


# ========= 核心逻辑 =========

def scan_to_markdown(input_dir, output_file):
    input_dir = os.path.abspath(input_dir)

    with open(output_file, "w", encoding="utf-8") as out:
        for root, dirs, files in os.walk(input_dir):

            # 过滤目录
            dirs[:] = [d for d in dirs if d not in IGNORE_DIRS]

            for file in files:
                if should_ignore_file(file):
                    continue

                full_path = os.path.join(root, file)
                rel_path = os.path.relpath(full_path, input_dir).replace("\\", "/")

                content = read_file_safe(full_path)
                if content is None:
                    continue

                lang = detect_language(file)

                out.write(f"## {rel_path}\n\n")
                out.write(f"```{lang}\n")
                out.write(content.rstrip() + "\n")
                out.write("```\n\n")


if __name__ == "__main__":

    # 默认值
    # input_dir = os.getcwd()
    # output_file = "output.md"
    
    input_dir = "C:\Dev\JavaSeries\IdeaProjects\SpringBootFromGithub\oa-parent\generator"
    output_file = "output.md"
    

    # 参数解析

    #if len(sys.argv) >= 2:
    #    input_dir = sys.argv[1]

    #if len(sys.argv) >= 3:
    #    output_file = sys.argv[2]


    scan_to_markdown(input_dir, output_file)

    print(f"完成:{input_dir} -> {output_file}")