Create obfuscate

This commit is contained in:
FlintyLemming 2025-04-07 20:52:40 +08:00
parent 9464c2e933
commit be2fd9d32f
2 changed files with 439 additions and 0 deletions

369
obfuscate/obfuscate.py Normal file
View File

@ -0,0 +1,369 @@
import os
import json
import uuid
import argparse
import sys
from pathlib import Path
# --- Configuration ---
MAPPING_FILENAME = "mapping.json"
# --- Obfuscation Function ---
def obfuscate_directory(target_dir):
"""
Recursively obfuscates filenames and directory names within target_dir.
Generates a mapping file for restoration.
"""
target_dir = Path(target_dir).resolve() # Get absolute path
if not target_dir.is_dir():
print(f"Error: Directory not found: {target_dir}")
return
mapping_file_path = target_dir / MAPPING_FILENAME
if mapping_file_path.exists():
print(f"Error: Mapping file '{MAPPING_FILENAME}' already exists in this directory.")
print("Please remove or rename it if you want to re-obfuscate.")
return
print(f"Starting obfuscation in: {target_dir}")
print(f"Mapping file will be saved as: {mapping_file_path}")
# Use a dictionary to store the mapping: {obfuscated_relative_path: original_relative_path}
# We build this *after* renaming everything, using an intermediate map.
renamed_paths_tracker = {} # {original_full_path: new_full_path}
try:
# Walk directory tree from bottom up (topdown=False)
# This ensures files/dirs inside a folder are renamed *before* the folder itself.
for dirpath_str, dirnames, filenames in os.walk(target_dir, topdown=False):
dirpath = Path(dirpath_str)
# --- Rename Files ---
for filename in filenames:
# Skip the mapping file itself!
if dirpath == target_dir and filename == MAPPING_FILENAME:
continue
original_full_path = dirpath / filename
original_relative_path = original_full_path.relative_to(target_dir)
original_stem, original_ext = os.path.splitext(filename) # Use os.path for splitext
new_uuid_stem = str(uuid.uuid4())
new_filename = new_uuid_stem + original_ext
# Determine the path where the file *currently* resides (parent might already be renamed)
current_parent_path_str = renamed_paths_tracker.get(str(dirpath), str(dirpath))
current_parent_path = Path(current_parent_path_str)
current_full_path = current_parent_path / filename # Path before renaming this file
new_full_path = current_parent_path / new_filename # Path after renaming this file
try:
os.rename(current_full_path, new_full_path)
renamed_paths_tracker[str(original_full_path)] = str(new_full_path)
# print(f" Renamed File: '{original_relative_path}' -> '{new_full_path.relative_to(target_dir)}'")
except OSError as e:
print(f"Error renaming file {current_full_path} to {new_full_path}: {e}")
return # Stop on error
# --- Rename Directories ---
# Only rename subdirectories, not the root target_dir itself
if dirpath != target_dir:
original_full_path = dirpath # For directories, the path *is* the item
original_relative_path = original_full_path.relative_to(target_dir)
original_dirname = dirpath.name
new_uuid_dirname = str(uuid.uuid4())
# Determine the path where the directory *currently* resides
current_grandparent_path_str = renamed_paths_tracker.get(str(dirpath.parent), str(dirpath.parent))
current_grandparent_path = Path(current_grandparent_path_str)
current_full_path = current_grandparent_path / original_dirname # Path before renaming this dir
new_full_path = current_grandparent_path / new_uuid_dirname # Path after renaming this dir
try:
# Check if the directory path we expect still exists (it should due to topdown=False)
if current_full_path.is_dir():
os.rename(current_full_path, new_full_path)
renamed_paths_tracker[str(original_full_path)] = str(new_full_path)
# print(f" Renamed Dir: '{original_relative_path}' -> '{new_full_path.relative_to(target_dir)}'")
else:
# This case might occur if the dir was already processed/renamed implicitly
# Find its new name if already tracked
if str(original_full_path) in renamed_paths_tracker:
# Already tracked, no need to rename again, just ensure mapping is correct.
pass
else:
print(f"Warning: Directory {current_full_path} seems to have been renamed already or is missing.")
except OSError as e:
print(f"Error renaming directory {current_full_path} to {new_full_path}: {e}")
return # Stop on error
# --- Create the final mapping file ---
final_mapping = {}
for original_full_str, new_full_str in renamed_paths_tracker.items():
original_full = Path(original_full_str)
new_full = Path(new_full_str)
original_relative = str(original_full.relative_to(target_dir)).replace('\\', '/') # Use forward slashes for consistency
new_relative = str(new_full.relative_to(target_dir)).replace('\\', '/') # Use forward slashes for consistency
# Ensure the key is the *final* obfuscated path
final_mapping[new_relative] = original_relative
# Save the mapping
with open(mapping_file_path, 'w', encoding='utf-8') as f:
json.dump(final_mapping, f, indent=4, ensure_ascii=False)
print("-" * 20)
print(f"Obfuscation complete.")
print(f"Mapping saved to: {mapping_file_path}")
print("IMPORTANT: Keep this mapping file safe to restore the original names!")
print("-" * 20)
except Exception as e:
print(f"\nAn unexpected error occurred during obfuscation: {e}")
print("Attempting to clean up...")
# Basic cleanup attempt (might not be perfect) - restore from tracker if possible
try_restore_from_tracker(target_dir, renamed_paths_tracker)
# --- Restoration Function ---
def restore_directory(target_dir):
"""
Restores filenames and directory names using the mapping file.
Handles potential file extension changes.
"""
target_dir = Path(target_dir).resolve()
if not target_dir.is_dir():
print(f"Error: Directory not found: {target_dir}")
return
mapping_file_path = target_dir / MAPPING_FILENAME
if not mapping_file_path.is_file():
print(f"Error: Mapping file '{MAPPING_FILENAME}' not found in {target_dir}")
print("Cannot restore without the mapping file.")
return
print(f"Starting restoration in: {target_dir}")
print(f"Using mapping file: {mapping_file_path}")
# Load the mapping: {obfuscated_relative_path: original_relative_path}
try:
with open(mapping_file_path, 'r', encoding='utf-8') as f:
mapping = json.load(f)
except json.JSONDecodeError as e:
print(f"Error reading mapping file: Invalid JSON - {e}")
return
except Exception as e:
print(f"Error reading mapping file: {e}")
return
# --- Build a lookup based on obfuscated base path for flexible matching ---
# lookup = { obfuscated_relative_base_path : original_relative_path }
# e.g., "uuid1/uuid2" -> "subdir/original_file.mov"
# e.g., "uuid1" -> "subdir"
lookup = {}
items_to_restore = [] # Store tuples (obfuscated_rel_base, original_rel, is_dir)
for obf_rel, orig_rel in mapping.items():
obf_rel_path = Path(obf_rel)
obf_dir = str(obf_rel_path.parent).replace('\\', '/')
obf_fullname = obf_rel_path.name
obf_stem, obf_ext = os.path.splitext(obf_fullname)
is_dir = not bool(obf_ext) and not obf_fullname.endswith('.') # Check if it looks like a directory path
# Construct the key for lookup (directory part + stem)
# Handle root case where parent is '.'
obf_base_key = Path(obf_stem) if obf_dir == '.' else Path(obf_dir) / obf_stem
lookup_key_str = str(obf_base_key).replace('\\', '/')
lookup[lookup_key_str] = orig_rel
items_to_restore.append((lookup_key_str, orig_rel, is_dir))
# --- Find all current items and prepare for renaming ---
current_obfuscated_items = {} # { current_full_path : lookup_key_str }
paths_to_rename = [] # List of (current_full_path, target_original_full_path)
# Walk the *current* (obfuscated) structure
for dirpath_str, dirnames, filenames in os.walk(target_dir, topdown=False):
dirpath = Path(dirpath_str)
# Process files first
for filename in filenames:
# Ignore the mapping file
if dirpath == target_dir and filename == MAPPING_FILENAME:
continue
current_full_path = dirpath / filename
current_relative_path = current_full_path.relative_to(target_dir)
current_dir = str(current_relative_path.parent).replace('\\', '/')
current_stem, current_ext = os.path.splitext(filename)
# Construct the lookup key from the *current* file's perspective
lookup_key_str = Path(current_stem) if current_dir == '.' else Path(current_dir) / current_stem
lookup_key_str = str(lookup_key_str).replace('\\', '/')
# Find the original path from our lookup
original_relative_path_str = lookup.get(lookup_key_str)
if original_relative_path_str:
target_original_full_path = target_dir / original_relative_path_str
paths_to_rename.append((str(current_full_path), str(target_original_full_path)))
else:
print(f"Warning: Could not find mapping for current file: {current_relative_path} (Lookup key: {lookup_key_str}). Skipping.")
# Process directories (after files within them)
if dirpath != target_dir : # Don't process the root target dir itself in the walk rename logic
current_full_path = dirpath
current_relative_path = current_full_path.relative_to(target_dir)
current_dir = str(current_relative_path.parent).replace('\\', '/')
current_stem = current_relative_path.name # Directory name is the stem
lookup_key_str = Path(current_stem) if current_dir == '.' else Path(current_dir) / current_stem
lookup_key_str = str(lookup_key_str).replace('\\', '/')
original_relative_path_str = lookup.get(lookup_key_str)
if original_relative_path_str:
target_original_full_path = target_dir / original_relative_path_str
# Add directory rename to the list *after* its contents
paths_to_rename.append((str(current_full_path), str(target_original_full_path)))
else:
# This case might happen if the directory name itself wasn't obfuscated directly
# or if there's an issue with the mapping lookup logic for dirs
print(f"Warning: Could not find mapping for current directory: {current_relative_path} (Lookup key: {lookup_key_str}). Skipping rename.")
# --- Execute Renames (Sorted by depth - deepest first) ---
# Although os.walk topdown=False helps, explicitly sorting ensures robustness
paths_to_rename.sort(key=lambda x: x[0].count(os.sep), reverse=True)
renamed_count = 0
error_count = 0
try:
for current_path_str, target_path_str in paths_to_rename:
current_path = Path(current_path_str)
target_path = Path(target_path_str)
if not current_path.exists():
print(f"Warning: Path {current_path} not found, possibly already renamed or moved. Skipping.")
continue
try:
# Ensure the target *parent* directory exists before renaming
target_path.parent.mkdir(parents=True, exist_ok=True)
# Check if target already exists (e.g., due to case-insensitivity or prior error)
if target_path.exists() and str(current_path).lower() != str(target_path).lower():
print(f"Warning: Target path {target_path} already exists. Skipping rename of {current_path}.")
error_count += 1
continue
os.rename(current_path, target_path)
renamed_count += 1
# print(f" Restored: '{current_path.relative_to(target_dir)}' -> '{target_path.relative_to(target_dir)}'")
except OSError as e:
print(f"Error restoring {current_path} to {target_path}: {e}")
error_count += 1
except Exception as e: # Catch other potential errors
print(f"Unexpected error restoring {current_path} to {target_path}: {e}")
error_count += 1
print("-" * 20)
if error_count == 0:
print(f"Restoration complete. {renamed_count} items restored.")
# Optionally remove the mapping file after successful restoration
# mapping_file_path.unlink()
# print(f"Mapping file '{MAPPING_FILENAME}' removed.")
else:
print(f"Restoration finished with {error_count} errors. {renamed_count} items potentially restored.")
print("Please check the directory structure and logs for issues.")
print(f"Mapping file '{MAPPING_FILENAME}' was NOT removed due to errors.")
print("-" * 20)
except Exception as e:
print(f"\nAn unexpected error occurred during restoration: {e}")
# --- Helper for potential cleanup on obfuscation error ---
def try_restore_from_tracker(target_dir, renamed_paths_tracker):
print("Attempting to restore names based on actions performed so far...")
restore_list = list(renamed_paths_tracker.items())
# Restore in reverse order of renaming (roughly deepest first)
restore_list.reverse()
restored_count = 0
for original_full_str, new_full_str in restore_list:
original_path = Path(original_full_str)
new_path = Path(new_full_str)
try:
if new_path.exists():
# Ensure original parent exists
original_path.parent.mkdir(parents=True, exist_ok=True)
os.rename(new_path, original_path)
restored_count += 1
# print(f" Rolled back: {new_path} -> {original_path}")
else:
# print(f" Skipping rollback: {new_path} not found.")
pass
except OSError as e:
print(f" Error during rollback for {new_path}: {e}")
print(f"Rollback attempt finished. {restored_count} items potentially restored.")
# --- Main Execution ---
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Obfuscate or restore filenames and directory names using UUIDs.",
formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument(
"mode",
choices=["obfuscate", "restore"],
help="Operation mode:\n"
" obfuscate: Rename files/folders to UUIDs and create mapping.json.\n"
" restore: Rename items back using mapping.json."
)
parser.add_argument(
"directory",
help="The target directory containing the video files and subfolders."
)
if len(sys.argv) == 1:
parser.print_help(sys.stderr)
sys.exit(1)
args = parser.parse_args()
target_directory = args.directory
if not os.path.isdir(target_directory):
print(f"Error: The specified directory does not exist: {target_directory}")
sys.exit(1)
if args.mode == "obfuscate":
# Confirmation prompt
print(f"\nWARNING: This will rename files and folders within '{target_directory}'.")
print("A mapping file ('mapping.json') will be created to allow restoration.")
confirm = input("Are you sure you want to proceed? (yes/no): ").lower()
if confirm == 'yes':
obfuscate_directory(target_directory)
else:
print("Obfuscation cancelled.")
elif args.mode == "restore":
# Confirmation prompt
print(f"\nWARNING: This will attempt to restore original names in '{target_directory}'")
print(f"using the '{MAPPING_FILENAME}' file found within it.")
print("This assumes the current names are the obfuscated ones.")
confirm = input("Are you sure you want to proceed? (yes/no): ").lower()
if confirm == 'yes':
restore_directory(target_directory)
else:
print("Restoration cancelled.")

70
obfuscate/readme.md Normal file
View File

@ -0,0 +1,70 @@
# 文件名/目录名混淆与恢复工具 (Filename/Directory Obfuscation & Restoration Tool)
## 概述
这是一个 Python 脚本,用于对指定目录下的文件名和子目录名进行“混淆”处理,将它们替换为随机生成的 UUID (Universally Unique Identifier),同时保留原始的文件扩展名。脚本会生成一个 `mapping.json` 文件,记录原始名称与混淆后名称的对应关系。之后,可以使用这个 `mapping.json` 文件将文件名和目录名恢复到原始状态。
这对于需要隐藏原始文件结构或名称,但又希望能够恢复的场景非常有用(例如,临时分享文件,或对大量媒体文件进行初步匿名化处理)。
## 功能特性
* **混淆 (Obfuscate)**:
* 递归遍历指定目录下的所有文件和子目录。
* 将每个文件(保留扩展名)和目录重命名为唯一的 UUID V4 字符串。
* 在目标目录的根目录下创建一个 `mapping.json` 文件,存储 `{ "混淆后的相对路径": "原始相对路径" }` 的映射关系。
* 优先处理深层路径,确保目录在其内容被重命名后才被重命名。
* 执行前会有确认提示,防止误操作。
* 如果混淆过程中发生错误,会尝试根据已执行的操作进行回滚。
* **恢复 (Restore)**:
* 读取目标目录根目录下的 `mapping.json` 文件。
* 根据映射关系,将混淆后的 UUID 名称恢复为原始的文件名和目录名。
* 能够处理嵌套目录的恢复。
* 执行前会有确认提示。
* 处理过程中会检查路径是否存在,并报告错误。
## 依赖
* Python 3.x
* 标准库: `os`, `json`, `uuid`, `argparse`, `sys`, `pathlib`
(无需安装额外的第三方库)
## 使用方法
通过命令行运行脚本。
```bash
python <脚本文件名>.py <模式> <目标目录路径>
```
**参数说明:**
* `<脚本文件名>.py`: 你保存此脚本的文件名 (例如 `obfuscator.py`)。
* `<模式>`:
* `obfuscate`: 执行混淆操作。
* `restore`: 执行恢复操作。
* `<目标目录路径>`: 需要进行操作的文件夹的路径。
**示例:**
1. **混淆目录 `/data/my_videos` 下的所有文件和子目录:**
```bash
python obfuscator.py obfuscate /data/my_videos
```
执行前会要求确认。成功后,`/data/my_videos` 下的文件和目录名会被替换为 UUID并在 `/data/my_videos` 目录下生成 `mapping.json`
2. **恢复目录 `/data/my_videos` 下的原始名称:**
```bash
python obfuscator.py restore /data/my_videos
```
执行前会要求确认。脚本会查找 `/data/my_videos/mapping.json` 文件,并根据其内容恢复原始名称。
## 重要提示与警告
1. **备份数据**: **强烈建议在运行此脚本(尤其是 `obfuscate` 模式)之前备份您的重要数据!** 脚本会直接重命名文件和目录,虽然有恢复机制,但意外情况(如 `mapping.json` 丢失或损坏)可能导致数据难以恢复。
2. **`mapping.json` 文件**: 这个文件是 **恢复原始名称的关键**。请务必妥善保管。如果丢失或损坏此文件,将 **无法** 通过此脚本恢复原始名称。
3. **`mapping.json` 的位置**: 混淆操作会在目标目录的 **根目录** 下创建 `mapping.json`。恢复操作也需要在此目录下找到该文件才能工作。
4. **重复混淆**: 如果目标目录下已经存在 `mapping.json` 文件,再次执行 `obfuscate` 操作将会失败,并提示用户移除或重命名该文件。这是为了防止意外覆盖重要的映射信息。
5. **恢复前提**: `restore` 操作假定当前目录中的文件名和结构是上次 `obfuscate` 操作产生的结果,并且与 `mapping.json` 文件中的记录相符。如果手动修改了混淆后的名称或结构,恢复可能会失败或产生不正确的结果。
6. **错误处理**: 脚本包含基本的错误处理(如文件/目录不存在、权限问题),但可能无法覆盖所有边缘情况。如果在操作过程中遇到错误,请检查控制台输出信息。恢复操作在出错时不会删除 `mapping.json` 文件。