scripts-public/obfuscate/obfuscate.py
2025-04-07 20:52:40 +08:00

370 lines
17 KiB
Python

import os
import json
import uuid
import argparse
import sys
from pathlib import Path
# --- Configuration ---
MAPPING_FILENAME = "mapping.json"
# --- Obfuscation Function ---
def obfuscate_directory(target_dir):
"""
Recursively obfuscates filenames and directory names within target_dir.
Generates a mapping file for restoration.
"""
target_dir = Path(target_dir).resolve() # Get absolute path
if not target_dir.is_dir():
print(f"Error: Directory not found: {target_dir}")
return
mapping_file_path = target_dir / MAPPING_FILENAME
if mapping_file_path.exists():
print(f"Error: Mapping file '{MAPPING_FILENAME}' already exists in this directory.")
print("Please remove or rename it if you want to re-obfuscate.")
return
print(f"Starting obfuscation in: {target_dir}")
print(f"Mapping file will be saved as: {mapping_file_path}")
# Use a dictionary to store the mapping: {obfuscated_relative_path: original_relative_path}
# We build this *after* renaming everything, using an intermediate map.
renamed_paths_tracker = {} # {original_full_path: new_full_path}
try:
# Walk directory tree from bottom up (topdown=False)
# This ensures files/dirs inside a folder are renamed *before* the folder itself.
for dirpath_str, dirnames, filenames in os.walk(target_dir, topdown=False):
dirpath = Path(dirpath_str)
# --- Rename Files ---
for filename in filenames:
# Skip the mapping file itself!
if dirpath == target_dir and filename == MAPPING_FILENAME:
continue
original_full_path = dirpath / filename
original_relative_path = original_full_path.relative_to(target_dir)
original_stem, original_ext = os.path.splitext(filename) # Use os.path for splitext
new_uuid_stem = str(uuid.uuid4())
new_filename = new_uuid_stem + original_ext
# Determine the path where the file *currently* resides (parent might already be renamed)
current_parent_path_str = renamed_paths_tracker.get(str(dirpath), str(dirpath))
current_parent_path = Path(current_parent_path_str)
current_full_path = current_parent_path / filename # Path before renaming this file
new_full_path = current_parent_path / new_filename # Path after renaming this file
try:
os.rename(current_full_path, new_full_path)
renamed_paths_tracker[str(original_full_path)] = str(new_full_path)
# print(f" Renamed File: '{original_relative_path}' -> '{new_full_path.relative_to(target_dir)}'")
except OSError as e:
print(f"Error renaming file {current_full_path} to {new_full_path}: {e}")
return # Stop on error
# --- Rename Directories ---
# Only rename subdirectories, not the root target_dir itself
if dirpath != target_dir:
original_full_path = dirpath # For directories, the path *is* the item
original_relative_path = original_full_path.relative_to(target_dir)
original_dirname = dirpath.name
new_uuid_dirname = str(uuid.uuid4())
# Determine the path where the directory *currently* resides
current_grandparent_path_str = renamed_paths_tracker.get(str(dirpath.parent), str(dirpath.parent))
current_grandparent_path = Path(current_grandparent_path_str)
current_full_path = current_grandparent_path / original_dirname # Path before renaming this dir
new_full_path = current_grandparent_path / new_uuid_dirname # Path after renaming this dir
try:
# Check if the directory path we expect still exists (it should due to topdown=False)
if current_full_path.is_dir():
os.rename(current_full_path, new_full_path)
renamed_paths_tracker[str(original_full_path)] = str(new_full_path)
# print(f" Renamed Dir: '{original_relative_path}' -> '{new_full_path.relative_to(target_dir)}'")
else:
# This case might occur if the dir was already processed/renamed implicitly
# Find its new name if already tracked
if str(original_full_path) in renamed_paths_tracker:
# Already tracked, no need to rename again, just ensure mapping is correct.
pass
else:
print(f"Warning: Directory {current_full_path} seems to have been renamed already or is missing.")
except OSError as e:
print(f"Error renaming directory {current_full_path} to {new_full_path}: {e}")
return # Stop on error
# --- Create the final mapping file ---
final_mapping = {}
for original_full_str, new_full_str in renamed_paths_tracker.items():
original_full = Path(original_full_str)
new_full = Path(new_full_str)
original_relative = str(original_full.relative_to(target_dir)).replace('\\', '/') # Use forward slashes for consistency
new_relative = str(new_full.relative_to(target_dir)).replace('\\', '/') # Use forward slashes for consistency
# Ensure the key is the *final* obfuscated path
final_mapping[new_relative] = original_relative
# Save the mapping
with open(mapping_file_path, 'w', encoding='utf-8') as f:
json.dump(final_mapping, f, indent=4, ensure_ascii=False)
print("-" * 20)
print(f"Obfuscation complete.")
print(f"Mapping saved to: {mapping_file_path}")
print("IMPORTANT: Keep this mapping file safe to restore the original names!")
print("-" * 20)
except Exception as e:
print(f"\nAn unexpected error occurred during obfuscation: {e}")
print("Attempting to clean up...")
# Basic cleanup attempt (might not be perfect) - restore from tracker if possible
try_restore_from_tracker(target_dir, renamed_paths_tracker)
# --- Restoration Function ---
def restore_directory(target_dir):
"""
Restores filenames and directory names using the mapping file.
Handles potential file extension changes.
"""
target_dir = Path(target_dir).resolve()
if not target_dir.is_dir():
print(f"Error: Directory not found: {target_dir}")
return
mapping_file_path = target_dir / MAPPING_FILENAME
if not mapping_file_path.is_file():
print(f"Error: Mapping file '{MAPPING_FILENAME}' not found in {target_dir}")
print("Cannot restore without the mapping file.")
return
print(f"Starting restoration in: {target_dir}")
print(f"Using mapping file: {mapping_file_path}")
# Load the mapping: {obfuscated_relative_path: original_relative_path}
try:
with open(mapping_file_path, 'r', encoding='utf-8') as f:
mapping = json.load(f)
except json.JSONDecodeError as e:
print(f"Error reading mapping file: Invalid JSON - {e}")
return
except Exception as e:
print(f"Error reading mapping file: {e}")
return
# --- Build a lookup based on obfuscated base path for flexible matching ---
# lookup = { obfuscated_relative_base_path : original_relative_path }
# e.g., "uuid1/uuid2" -> "subdir/original_file.mov"
# e.g., "uuid1" -> "subdir"
lookup = {}
items_to_restore = [] # Store tuples (obfuscated_rel_base, original_rel, is_dir)
for obf_rel, orig_rel in mapping.items():
obf_rel_path = Path(obf_rel)
obf_dir = str(obf_rel_path.parent).replace('\\', '/')
obf_fullname = obf_rel_path.name
obf_stem, obf_ext = os.path.splitext(obf_fullname)
is_dir = not bool(obf_ext) and not obf_fullname.endswith('.') # Check if it looks like a directory path
# Construct the key for lookup (directory part + stem)
# Handle root case where parent is '.'
obf_base_key = Path(obf_stem) if obf_dir == '.' else Path(obf_dir) / obf_stem
lookup_key_str = str(obf_base_key).replace('\\', '/')
lookup[lookup_key_str] = orig_rel
items_to_restore.append((lookup_key_str, orig_rel, is_dir))
# --- Find all current items and prepare for renaming ---
current_obfuscated_items = {} # { current_full_path : lookup_key_str }
paths_to_rename = [] # List of (current_full_path, target_original_full_path)
# Walk the *current* (obfuscated) structure
for dirpath_str, dirnames, filenames in os.walk(target_dir, topdown=False):
dirpath = Path(dirpath_str)
# Process files first
for filename in filenames:
# Ignore the mapping file
if dirpath == target_dir and filename == MAPPING_FILENAME:
continue
current_full_path = dirpath / filename
current_relative_path = current_full_path.relative_to(target_dir)
current_dir = str(current_relative_path.parent).replace('\\', '/')
current_stem, current_ext = os.path.splitext(filename)
# Construct the lookup key from the *current* file's perspective
lookup_key_str = Path(current_stem) if current_dir == '.' else Path(current_dir) / current_stem
lookup_key_str = str(lookup_key_str).replace('\\', '/')
# Find the original path from our lookup
original_relative_path_str = lookup.get(lookup_key_str)
if original_relative_path_str:
target_original_full_path = target_dir / original_relative_path_str
paths_to_rename.append((str(current_full_path), str(target_original_full_path)))
else:
print(f"Warning: Could not find mapping for current file: {current_relative_path} (Lookup key: {lookup_key_str}). Skipping.")
# Process directories (after files within them)
if dirpath != target_dir : # Don't process the root target dir itself in the walk rename logic
current_full_path = dirpath
current_relative_path = current_full_path.relative_to(target_dir)
current_dir = str(current_relative_path.parent).replace('\\', '/')
current_stem = current_relative_path.name # Directory name is the stem
lookup_key_str = Path(current_stem) if current_dir == '.' else Path(current_dir) / current_stem
lookup_key_str = str(lookup_key_str).replace('\\', '/')
original_relative_path_str = lookup.get(lookup_key_str)
if original_relative_path_str:
target_original_full_path = target_dir / original_relative_path_str
# Add directory rename to the list *after* its contents
paths_to_rename.append((str(current_full_path), str(target_original_full_path)))
else:
# This case might happen if the directory name itself wasn't obfuscated directly
# or if there's an issue with the mapping lookup logic for dirs
print(f"Warning: Could not find mapping for current directory: {current_relative_path} (Lookup key: {lookup_key_str}). Skipping rename.")
# --- Execute Renames (Sorted by depth - deepest first) ---
# Although os.walk topdown=False helps, explicitly sorting ensures robustness
paths_to_rename.sort(key=lambda x: x[0].count(os.sep), reverse=True)
renamed_count = 0
error_count = 0
try:
for current_path_str, target_path_str in paths_to_rename:
current_path = Path(current_path_str)
target_path = Path(target_path_str)
if not current_path.exists():
print(f"Warning: Path {current_path} not found, possibly already renamed or moved. Skipping.")
continue
try:
# Ensure the target *parent* directory exists before renaming
target_path.parent.mkdir(parents=True, exist_ok=True)
# Check if target already exists (e.g., due to case-insensitivity or prior error)
if target_path.exists() and str(current_path).lower() != str(target_path).lower():
print(f"Warning: Target path {target_path} already exists. Skipping rename of {current_path}.")
error_count += 1
continue
os.rename(current_path, target_path)
renamed_count += 1
# print(f" Restored: '{current_path.relative_to(target_dir)}' -> '{target_path.relative_to(target_dir)}'")
except OSError as e:
print(f"Error restoring {current_path} to {target_path}: {e}")
error_count += 1
except Exception as e: # Catch other potential errors
print(f"Unexpected error restoring {current_path} to {target_path}: {e}")
error_count += 1
print("-" * 20)
if error_count == 0:
print(f"Restoration complete. {renamed_count} items restored.")
# Optionally remove the mapping file after successful restoration
# mapping_file_path.unlink()
# print(f"Mapping file '{MAPPING_FILENAME}' removed.")
else:
print(f"Restoration finished with {error_count} errors. {renamed_count} items potentially restored.")
print("Please check the directory structure and logs for issues.")
print(f"Mapping file '{MAPPING_FILENAME}' was NOT removed due to errors.")
print("-" * 20)
except Exception as e:
print(f"\nAn unexpected error occurred during restoration: {e}")
# --- Helper for potential cleanup on obfuscation error ---
def try_restore_from_tracker(target_dir, renamed_paths_tracker):
print("Attempting to restore names based on actions performed so far...")
restore_list = list(renamed_paths_tracker.items())
# Restore in reverse order of renaming (roughly deepest first)
restore_list.reverse()
restored_count = 0
for original_full_str, new_full_str in restore_list:
original_path = Path(original_full_str)
new_path = Path(new_full_str)
try:
if new_path.exists():
# Ensure original parent exists
original_path.parent.mkdir(parents=True, exist_ok=True)
os.rename(new_path, original_path)
restored_count += 1
# print(f" Rolled back: {new_path} -> {original_path}")
else:
# print(f" Skipping rollback: {new_path} not found.")
pass
except OSError as e:
print(f" Error during rollback for {new_path}: {e}")
print(f"Rollback attempt finished. {restored_count} items potentially restored.")
# --- Main Execution ---
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Obfuscate or restore filenames and directory names using UUIDs.",
formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument(
"mode",
choices=["obfuscate", "restore"],
help="Operation mode:\n"
" obfuscate: Rename files/folders to UUIDs and create mapping.json.\n"
" restore: Rename items back using mapping.json."
)
parser.add_argument(
"directory",
help="The target directory containing the video files and subfolders."
)
if len(sys.argv) == 1:
parser.print_help(sys.stderr)
sys.exit(1)
args = parser.parse_args()
target_directory = args.directory
if not os.path.isdir(target_directory):
print(f"Error: The specified directory does not exist: {target_directory}")
sys.exit(1)
if args.mode == "obfuscate":
# Confirmation prompt
print(f"\nWARNING: This will rename files and folders within '{target_directory}'.")
print("A mapping file ('mapping.json') will be created to allow restoration.")
confirm = input("Are you sure you want to proceed? (yes/no): ").lower()
if confirm == 'yes':
obfuscate_directory(target_directory)
else:
print("Obfuscation cancelled.")
elif args.mode == "restore":
# Confirmation prompt
print(f"\nWARNING: This will attempt to restore original names in '{target_directory}'")
print(f"using the '{MAPPING_FILENAME}' file found within it.")
print("This assumes the current names are the obfuscated ones.")
confirm = input("Are you sure you want to proceed? (yes/no): ").lower()
if confirm == 'yes':
restore_directory(target_directory)
else:
print("Restoration cancelled.")