433 lines
18 KiB
Python
433 lines
18 KiB
Python
import os
|
|
import re
|
|
import zipfile
|
|
import tempfile
|
|
import shutil
|
|
import argparse
|
|
from tqdm import tqdm
|
|
from collections import defaultdict
|
|
import unicodedata
|
|
|
|
def normalize_filename(filename):
|
|
"""Normalize a filename to handle encoding issues and special characters."""
|
|
# Replace common problematic characters
|
|
replacements = {
|
|
'?': "'", # Replace question marks that might be incorrectly encoded apostrophes
|
|
'?': "'", # Another possible encoding of apostrophe
|
|
'?': '"', # Possible encoding of double quote
|
|
'?': '-', # Possible encoding of dash
|
|
'?': ' ' # Possible encoding of space
|
|
}
|
|
|
|
for bad_char, good_char in replacements.items():
|
|
filename = filename.replace(bad_char, good_char)
|
|
|
|
return filename
|
|
|
|
def parse_manga_filename(filename):
|
|
"""Extract volume, chapter and title information from a manga filename."""
|
|
# Pattern to match: manga_name v## c### [optional title] [optional group]
|
|
base_filename = os.path.basename(filename)
|
|
|
|
# Try to normalize the filename to handle encoding issues
|
|
normalized_filename = normalize_filename(base_filename)
|
|
|
|
# Use a more flexible pattern to handle apostrophes and other special characters
|
|
pattern = r'(.*?)\s+v(\d+)\s+c(\d+[.\d-]*)\s+(.*)\.cbz$'
|
|
match = re.match(pattern, normalized_filename)
|
|
|
|
if match:
|
|
manga_name = match.group(1).strip()
|
|
volume = int(match.group(2))
|
|
chapter_str = match.group(3)
|
|
rest = match.group(4)
|
|
|
|
# Try to extract title and group if available
|
|
group_match = re.search(r'\[(.*?)\]$', rest)
|
|
if group_match:
|
|
group = group_match.group(1)
|
|
title = rest[:rest.rfind('[')-1].strip()
|
|
else:
|
|
group = ""
|
|
title = rest.strip()
|
|
|
|
# Handle chapter numbers like "005.5" or "005-006"
|
|
try:
|
|
chapter = float(chapter_str)
|
|
except ValueError:
|
|
try:
|
|
chapter = float(chapter_str.split('-')[0]) # Take first number for ranges
|
|
except ValueError:
|
|
chapter = 0 # Fallback for unparseable chapter numbers
|
|
|
|
return {
|
|
'manga_name': manga_name,
|
|
'volume': volume,
|
|
'chapter': chapter,
|
|
'chapter_str': chapter_str,
|
|
'title': title,
|
|
'group': group,
|
|
'filename': filename
|
|
}
|
|
|
|
if os.path.exists(filename):
|
|
# For debugging: print the filename that couldn't be parsed
|
|
print(f"WARNING: Could not parse filename: {base_filename}")
|
|
|
|
return None
|
|
|
|
def find_cbz_files(folder_path, recursive=False, extra_verbose=False):
|
|
"""Find all CBZ files in the given folder."""
|
|
cbz_files = []
|
|
|
|
if recursive:
|
|
if extra_verbose:
|
|
print(f"Recursively searching for CBZ files in {folder_path}")
|
|
|
|
for root, _, files in os.walk(folder_path):
|
|
for file in files:
|
|
if file.lower().endswith('.cbz'):
|
|
cbz_files.append(os.path.join(root, file))
|
|
else:
|
|
if extra_verbose:
|
|
print(f"Searching for CBZ files in {folder_path} (non-recursive)")
|
|
|
|
for file in os.listdir(folder_path):
|
|
if file.lower().endswith('.cbz'):
|
|
cbz_files.append(os.path.join(folder_path, file))
|
|
|
|
if extra_verbose:
|
|
print(f"Found {len(cbz_files)} CBZ files")
|
|
|
|
return cbz_files
|
|
|
|
def fix_missing_files(chapter_infos, folder_path, extra_verbose=False):
|
|
"""Attempt to find missing files by searching for similar filenames."""
|
|
fixed_chapters = []
|
|
unresolved_chapters = []
|
|
|
|
for chapter_info in chapter_infos:
|
|
if os.path.exists(chapter_info['filename']):
|
|
fixed_chapters.append(chapter_info)
|
|
continue
|
|
|
|
# Get the problematic filename
|
|
base_filename = os.path.basename(chapter_info['filename'])
|
|
if extra_verbose:
|
|
print(f"Trying to find replacement for: {base_filename}")
|
|
|
|
# Look for similar files in the directory
|
|
found_replacement = False
|
|
directory = os.path.dirname(chapter_info['filename'])
|
|
|
|
try:
|
|
for file in os.listdir(directory):
|
|
if not file.lower().endswith('.cbz'):
|
|
continue
|
|
|
|
# Check if volume and chapter match
|
|
v_match = re.search(fr"v{chapter_info['volume']:02d}", file)
|
|
c_match = re.search(fr"c{chapter_info['chapter_str']}", file.replace('?', "'"))
|
|
|
|
if v_match and c_match:
|
|
if extra_verbose:
|
|
print(f" Found potential replacement: {file}")
|
|
|
|
# Create a new chapter info with the correct filename
|
|
new_chapter_info = dict(chapter_info)
|
|
new_chapter_info['filename'] = os.path.join(directory, file)
|
|
|
|
if os.path.exists(new_chapter_info['filename']):
|
|
if extra_verbose:
|
|
print(f" Replacement file exists, using it instead")
|
|
fixed_chapters.append(new_chapter_info)
|
|
found_replacement = True
|
|
break
|
|
except Exception as e:
|
|
if extra_verbose:
|
|
print(f" Error while searching for replacement: {str(e)}")
|
|
|
|
if not found_replacement:
|
|
unresolved_chapters.append(chapter_info)
|
|
|
|
return fixed_chapters, unresolved_chapters
|
|
|
|
def organize_by_volume(cbz_files, extra_verbose=False):
|
|
"""Group CBZ files by manga name and volume."""
|
|
volumes = defaultdict(lambda: defaultdict(list))
|
|
unparsed_files = []
|
|
|
|
for cbz_file in cbz_files:
|
|
info = parse_manga_filename(cbz_file)
|
|
if info:
|
|
manga_key = info['manga_name'].lower()
|
|
volumes[manga_key][info['volume']].append(info)
|
|
else:
|
|
unparsed_files.append(cbz_file)
|
|
|
|
# Sort chapters within each volume
|
|
for manga in volumes:
|
|
for volume in volumes[manga]:
|
|
volumes[manga][volume].sort(key=lambda x: x['chapter'])
|
|
|
|
if extra_verbose and unparsed_files:
|
|
print(f"\nWARNING: Could not parse {len(unparsed_files)} files:")
|
|
for file in unparsed_files[:10]: # Show first 10 only to avoid spam
|
|
print(f" - {os.path.basename(file)}")
|
|
if len(unparsed_files) > 10:
|
|
print(f" ... and {len(unparsed_files) - 10} more")
|
|
|
|
return volumes
|
|
|
|
def create_volume_cbz(manga_name, volume_num, chapter_infos, output_dir=None, force=False, verbose=False, extra_verbose=False):
|
|
"""Combine multiple chapter CBZ files into a single volume CBZ."""
|
|
# Determine output path
|
|
volume_filename = f"{manga_name} - Volume {volume_num:02d}.cbz"
|
|
|
|
if output_dir:
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
output_path = os.path.join(output_dir, volume_filename)
|
|
else:
|
|
# Use the directory of the first chapter
|
|
output_path = os.path.join(os.path.dirname(chapter_infos[0]['filename']), volume_filename)
|
|
|
|
if verbose:
|
|
print(f"Creating volume: {volume_filename}")
|
|
print(f"Output path: {output_path}")
|
|
|
|
# Check if volume already exists
|
|
if os.path.exists(output_path) and not force:
|
|
if verbose:
|
|
print(f"Skipping {volume_filename} (already exists)")
|
|
return True, "Skipped (already exists)"
|
|
|
|
try:
|
|
# Verify all chapter files exist before starting
|
|
missing_files = []
|
|
for chapter_info in chapter_infos:
|
|
if not os.path.exists(chapter_info['filename']):
|
|
missing_files.append(chapter_info['filename'])
|
|
|
|
if missing_files:
|
|
if extra_verbose:
|
|
print(f"Found {len(missing_files)} missing files, attempting to fix:")
|
|
for missing in missing_files:
|
|
print(f" - {os.path.basename(missing)}")
|
|
|
|
# Try to fix missing files by finding alternatives
|
|
chapter_dir = os.path.dirname(chapter_infos[0]['filename'])
|
|
fixed_chapters, unresolved_chapters = fix_missing_files(chapter_infos, chapter_dir, extra_verbose)
|
|
|
|
if unresolved_chapters:
|
|
if extra_verbose:
|
|
print(f"Could not resolve {len(unresolved_chapters)} missing files:")
|
|
for chapter in unresolved_chapters:
|
|
print(f" - {os.path.basename(chapter['filename'])}")
|
|
return False, f"Missing {len(unresolved_chapters)} chapter file(s) after resolution attempts"
|
|
|
|
# Update chapter_infos with the fixed list
|
|
chapter_infos = fixed_chapters
|
|
|
|
if extra_verbose:
|
|
print("All missing files resolved, proceeding with conversion")
|
|
|
|
# Create a temporary directory for extraction
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
# Extract all chapters in order
|
|
for chapter_info in tqdm(chapter_infos, desc=f"Extracting chapters for Volume {volume_num}", disable=not verbose):
|
|
chapter_dir = os.path.join(temp_dir, f"chapter_{chapter_info['chapter_str'].zfill(3)}")
|
|
os.makedirs(chapter_dir, exist_ok=True)
|
|
|
|
if verbose:
|
|
print(f"Extracting chapter {chapter_info['chapter_str']}")
|
|
|
|
if extra_verbose:
|
|
print(f"File: {chapter_info['filename']}")
|
|
if not os.path.exists(chapter_info['filename']):
|
|
print(f" ERROR: File does not exist!")
|
|
continue
|
|
|
|
try:
|
|
# Extract the chapter
|
|
with zipfile.ZipFile(chapter_info['filename'], 'r') as zf:
|
|
file_list = sorted(zf.namelist())
|
|
|
|
if extra_verbose:
|
|
print(f" Contains {len(file_list)} files:")
|
|
|
|
for i, file_name in enumerate(file_list):
|
|
if file_name.endswith('/'): # Skip directories
|
|
continue
|
|
|
|
if extra_verbose and i < 10: # Show first 10 files only
|
|
print(f" - {file_name}")
|
|
|
|
# Extract with a standardized naming pattern: chapterXXX_pageYYY.ext
|
|
base, ext = os.path.splitext(os.path.basename(file_name))
|
|
new_name = f"chapter{chapter_info['chapter_str'].zfill(3)}_{i+1:03d}{ext}"
|
|
|
|
# Extract file to temp directory
|
|
try:
|
|
with zf.open(file_name) as source, open(os.path.join(chapter_dir, new_name), 'wb') as target:
|
|
shutil.copyfileobj(source, target)
|
|
except Exception as file_error:
|
|
if extra_verbose:
|
|
print(f" ERROR extracting {file_name}: {str(file_error)}")
|
|
|
|
if extra_verbose and len(file_list) > 10:
|
|
print(f" ... and {len(file_list) - 10} more files")
|
|
except Exception as e:
|
|
return False, f"Error extracting chapter {chapter_info['chapter_str']}: {str(e)}"
|
|
|
|
# Create the volume CBZ
|
|
if verbose:
|
|
print(f"Creating volume CBZ: {volume_filename}")
|
|
|
|
try:
|
|
with zipfile.ZipFile(output_path, 'w') as volume_zip:
|
|
# Add all files from all chapters in order
|
|
chapter_dirs = sorted(os.listdir(temp_dir))
|
|
|
|
for chapter_dir in chapter_dirs:
|
|
chapter_path = os.path.join(temp_dir, chapter_dir)
|
|
if os.path.isdir(chapter_path):
|
|
chapter_files = sorted(os.listdir(chapter_path))
|
|
|
|
if extra_verbose:
|
|
print(f"Adding directory: {chapter_dir} ({len(chapter_files)} files)")
|
|
|
|
for file in chapter_files:
|
|
file_path = os.path.join(chapter_path, file)
|
|
arc_name = os.path.join(chapter_dir, file)
|
|
|
|
if extra_verbose and chapter_files.index(file) < 5:
|
|
print(f" - Adding {arc_name}")
|
|
|
|
volume_zip.write(file_path, arc_name)
|
|
except Exception as e:
|
|
return False, f"Error creating volume ZIP: {str(e)}"
|
|
|
|
return True, "Created"
|
|
except Exception as e:
|
|
return False, str(e)
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Combine individual CBZ chapters into volume CBZ files')
|
|
parser.add_argument('folder', help='Folder containing CBZ chapter files')
|
|
parser.add_argument('-r', '--recursive', action='store_true', help='Search for CBZ files recursively')
|
|
parser.add_argument('-o', '--output', help='Output folder for volume CBZ files (defaults to same location as chapters)')
|
|
parser.add_argument('-f', '--force', action='store_true', help='Force creation even if volume CBZ already exists')
|
|
parser.add_argument('-v', '--verbose', action='store_true', help='Show detailed progress')
|
|
parser.add_argument('-vv', '--extra-verbose', action='store_true', help='Show extremely detailed debugging information')
|
|
parser.add_argument('-m', '--min-chapters', type=int, default=2,
|
|
help='Minimum number of chapters required to create a volume (default: 2)')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# If extra-verbose is enabled, automatically enable verbose too
|
|
if args.extra_verbose:
|
|
args.verbose = True
|
|
|
|
if not os.path.isdir(args.folder):
|
|
print(f"Error: '{args.folder}' is not a valid directory")
|
|
return 1
|
|
|
|
cbz_files = find_cbz_files(args.folder, args.recursive, args.extra_verbose)
|
|
|
|
if not cbz_files:
|
|
print(f"No CBZ files found in '{args.folder}'")
|
|
return 0
|
|
|
|
print(f"Found {len(cbz_files)} CBZ file(s)")
|
|
|
|
# Organize files by manga and volume
|
|
volumes = organize_by_volume(cbz_files, args.extra_verbose)
|
|
|
|
total_manga = len(volumes)
|
|
total_volumes = sum(len(volumes[manga]) for manga in volumes)
|
|
|
|
print(f"Found {total_manga} manga series with {total_volumes} volume(s) to process")
|
|
|
|
# Print detailed manga and volume information in extra verbose mode
|
|
if args.extra_verbose:
|
|
print("\nDetailed manga and volume breakdown:")
|
|
for manga_name in volumes:
|
|
try:
|
|
first_volume = min(volumes[manga_name].keys())
|
|
manga_display_name = volumes[manga_name][first_volume][0]['manga_name']
|
|
print(f"\n{manga_display_name}:")
|
|
for volume_num in sorted(volumes[manga_name].keys()):
|
|
chapters = volumes[manga_name][volume_num]
|
|
print(f" Volume {volume_num}: {len(chapters)} chapters")
|
|
if args.extra_verbose:
|
|
for chapter in chapters:
|
|
print(f" - Chapter {chapter['chapter_str']}: {os.path.basename(chapter['filename'])}")
|
|
print(f" Exists: {os.path.exists(chapter['filename'])}")
|
|
except (ValueError, IndexError):
|
|
pass
|
|
|
|
success_count = 0
|
|
skip_count = 0
|
|
fail_count = 0
|
|
ignored_count = 0
|
|
|
|
# Process each manga and volume
|
|
for manga_name in volumes:
|
|
# Get a proper display name from the first volume's first chapter
|
|
try:
|
|
first_volume = min(volumes[manga_name].keys())
|
|
manga_display_name = volumes[manga_name][first_volume][0]['manga_name']
|
|
except (ValueError, IndexError, KeyError):
|
|
# Fallback if we can't get proper name
|
|
manga_display_name = manga_name
|
|
|
|
if args.verbose:
|
|
print(f"\nProcessing manga: {manga_display_name}")
|
|
|
|
for volume_num in sorted(volumes[manga_name].keys()):
|
|
chapters = volumes[manga_name][volume_num]
|
|
|
|
# Skip volumes with too few chapters
|
|
if len(chapters) < args.min_chapters:
|
|
if args.verbose:
|
|
print(f"Skipping Volume {volume_num} - only has {len(chapters)} chapter(s) (minimum is {args.min_chapters})")
|
|
ignored_count += 1
|
|
continue
|
|
|
|
# List all chapters for debugging
|
|
if args.extra_verbose:
|
|
print(f"\nChapters for {manga_display_name} Volume {volume_num}:")
|
|
for chapter in chapters:
|
|
print(f" - Chapter {chapter['chapter_str']}: {os.path.basename(chapter['filename'])}")
|
|
print(f" Exists: {os.path.exists(chapter['filename'])}")
|
|
|
|
success, message = create_volume_cbz(
|
|
manga_display_name,
|
|
volume_num,
|
|
chapters,
|
|
args.output,
|
|
args.force,
|
|
args.verbose,
|
|
args.extra_verbose
|
|
)
|
|
|
|
if success:
|
|
if message == "Skipped (already exists)":
|
|
skip_count += 1
|
|
else:
|
|
success_count += 1
|
|
else:
|
|
fail_count += 1
|
|
print(f"Error creating Volume {volume_num} for {manga_display_name}: {message}")
|
|
|
|
print(f"\nVolume creation complete:")
|
|
print(f" - {success_count} volumes created successfully")
|
|
print(f" - {skip_count} volumes skipped (already exist)")
|
|
print(f" - {ignored_count} volumes ignored (too few chapters)")
|
|
print(f" - {fail_count} volumes failed")
|
|
|
|
return 0
|
|
|
|
if __name__ == '__main__':
|
|
import sys
|
|
sys.exit(main()) |