目的是为了方便给网页版ai分析,配合之前的tree命令给ai看文件目录结构
tree -L 10 -I 'target|node_modules|.git|logs|*.class' > project-tree.txtimport os
# ========= 配置 =========
# ========= 忽略目录 =========
IGNORE_DIRS = {
".git",
".idea",
".mvn",
"target", # Maven 构建输出(最重要)
"node_modules", # 前端依赖(如果有)
"dist",
"build",
"__pycache__", # 防止误扫 Python 缓存
"logs", # 日志目录(常见)
}
# ========= 忽略文件 =========
IGNORE_FILES = {
"output.md",
"output_black.md",
"output_white.md",
".DS_Store",
}
# ========= 忽略后缀 =========
IGNORE_SUFFIX = {
# 二进制 / 编译产物
".class", ".jar", ".war", ".ear",
# 压缩包
".zip", ".tar", ".gz", ".7z",
# 图片 / 媒体
".png", ".jpg", ".jpeg", ".gif", ".bmp", ".svg",
# 可执行 / 动态库
".exe", ".dll", ".so", ".dylib",
# 日志
".log",
# IDE / 系统
".iml",
# 其他无意义文件
".lock"
}
# ========= 保留后缀 =========
ALLOW_SUFFIX = {
".java",
".xml",
".yml", ".yaml",
".properties",
".md",
".sql",
".sh",
".ftl", # ✔ FreeMarker 模板
}
OUTPUT_FILE = "output_white.md"
# 根据后缀映射 Markdown 代码块语言
LANG_MAP = {
".py": "python",
".java": "java",
".js": "javascript",
".ts": "typescript",
".json": "json",
".xml": "xml",
".html": "html",
".css": "css",
".sh": "bash",
".yml": "yaml",
".yaml": "yaml",
".md": "markdown",
".go": "go",
".rs": "rust",
".ftl": "freemarker", # ✔ 关键新增
}
# ========= 工具函数 =========
def should_ignore_file(filename):
if filename in IGNORE_FILES:
return True
# 只保留允许类型(关键)
if not any(filename.endswith(suf) for suf in ALLOW_SUFFIX):
return True
return False
def detect_language(filename):
_, ext = os.path.splitext(filename)
return LANG_MAP.get(ext.lower(), "")
def read_file_safe(path):
try:
with open(path, "r", encoding="utf-8") as f:
return f.read()
except Exception:
try:
with open(path, "r", encoding="gbk") as f:
return f.read()
except Exception:
return None
# ========= 核心逻辑 =========
def scan_to_markdown(input_dir, output_file):
input_dir = os.path.abspath(input_dir)
with open(output_file, "w", encoding="utf-8") as out:
for root, dirs, files in os.walk(input_dir):
# 过滤目录
dirs[:] = [d for d in dirs if d not in IGNORE_DIRS]
for file in files:
if should_ignore_file(file):
continue
full_path = os.path.join(root, file)
rel_path = os.path.relpath(full_path, input_dir).replace("\\", "/")
content = read_file_safe(full_path)
if content is None:
continue
lang = detect_language(file)
out.write(f"## {rel_path}\n\n")
out.write(f"```{lang}\n")
out.write(content.rstrip() + "\n")
out.write("```\n\n")
if __name__ == "__main__":
# 默认值
# input_dir = os.getcwd()
# output_file = "output.md"
input_dir = "C:\Dev\JavaSeries\IdeaProjects\SpringBootFromGithub\oa-parent\generator"
output_file = "output.md"
# 参数解析
#if len(sys.argv) >= 2:
# input_dir = sys.argv[1]
#if len(sys.argv) >= 3:
# output_file = sys.argv[2]
scan_to_markdown(input_dir, output_file)
print(f"完成:{input_dir} -> {output_file}")