import os import re import zipfile import tempfile import shutil import argparse from tqdm import tqdm from collections import defaultdict import unicodedata def normalize_filename(filename): """Normalize a filename to handle encoding issues and special characters.""" # Replace common problematic characters replacements = { '?': "'", # Replace question marks that might be incorrectly encoded apostrophes '?': "'", # Another possible encoding of apostrophe '?': '"', # Possible encoding of double quote '?': '-', # Possible encoding of dash '?': ' ' # Possible encoding of space } for bad_char, good_char in replacements.items(): filename = filename.replace(bad_char, good_char) return filename def parse_manga_filename(filename): """Extract volume, chapter and title information from a manga filename.""" # Pattern to match: manga_name v## c### [optional title] [optional group] base_filename = os.path.basename(filename) # Try to normalize the filename to handle encoding issues normalized_filename = normalize_filename(base_filename) # Use a more flexible pattern to handle apostrophes and other special characters pattern = r'(.*?)\s+v(\d+)\s+c(\d+[.\d-]*)\s+(.*)\.cbz$' match = re.match(pattern, normalized_filename) if match: manga_name = match.group(1).strip() volume = int(match.group(2)) chapter_str = match.group(3) rest = match.group(4) # Try to extract title and group if available group_match = re.search(r'\[(.*?)\]$', rest) if group_match: group = group_match.group(1) title = rest[:rest.rfind('[')-1].strip() else: group = "" title = rest.strip() # Handle chapter numbers like "005.5" or "005-006" try: chapter = float(chapter_str) except ValueError: try: chapter = float(chapter_str.split('-')[0]) # Take first number for ranges except ValueError: chapter = 0 # Fallback for unparseable chapter numbers return { 'manga_name': manga_name, 'volume': volume, 'chapter': chapter, 'chapter_str': chapter_str, 'title': title, 'group': group, 'filename': filename } if os.path.exists(filename): # For debugging: print the filename that couldn't be parsed print(f"WARNING: Could not parse filename: {base_filename}") return None def find_cbz_files(folder_path, recursive=False, extra_verbose=False): """Find all CBZ files in the given folder.""" cbz_files = [] if recursive: if extra_verbose: print(f"Recursively searching for CBZ files in {folder_path}") for root, _, files in os.walk(folder_path): for file in files: if file.lower().endswith('.cbz'): cbz_files.append(os.path.join(root, file)) else: if extra_verbose: print(f"Searching for CBZ files in {folder_path} (non-recursive)") for file in os.listdir(folder_path): if file.lower().endswith('.cbz'): cbz_files.append(os.path.join(folder_path, file)) if extra_verbose: print(f"Found {len(cbz_files)} CBZ files") return cbz_files def fix_missing_files(chapter_infos, folder_path, extra_verbose=False): """Attempt to find missing files by searching for similar filenames.""" fixed_chapters = [] unresolved_chapters = [] for chapter_info in chapter_infos: if os.path.exists(chapter_info['filename']): fixed_chapters.append(chapter_info) continue # Get the problematic filename base_filename = os.path.basename(chapter_info['filename']) if extra_verbose: print(f"Trying to find replacement for: {base_filename}") # Look for similar files in the directory found_replacement = False directory = os.path.dirname(chapter_info['filename']) try: for file in os.listdir(directory): if not file.lower().endswith('.cbz'): continue # Check if volume and chapter match v_match = re.search(fr"v{chapter_info['volume']:02d}", file) c_match = re.search(fr"c{chapter_info['chapter_str']}", file.replace('?', "'")) if v_match and c_match: if extra_verbose: print(f" Found potential replacement: {file}") # Create a new chapter info with the correct filename new_chapter_info = dict(chapter_info) new_chapter_info['filename'] = os.path.join(directory, file) if os.path.exists(new_chapter_info['filename']): if extra_verbose: print(f" Replacement file exists, using it instead") fixed_chapters.append(new_chapter_info) found_replacement = True break except Exception as e: if extra_verbose: print(f" Error while searching for replacement: {str(e)}") if not found_replacement: unresolved_chapters.append(chapter_info) return fixed_chapters, unresolved_chapters def organize_by_volume(cbz_files, extra_verbose=False): """Group CBZ files by manga name and volume.""" volumes = defaultdict(lambda: defaultdict(list)) unparsed_files = [] for cbz_file in cbz_files: info = parse_manga_filename(cbz_file) if info: manga_key = info['manga_name'].lower() volumes[manga_key][info['volume']].append(info) else: unparsed_files.append(cbz_file) # Sort chapters within each volume for manga in volumes: for volume in volumes[manga]: volumes[manga][volume].sort(key=lambda x: x['chapter']) if extra_verbose and unparsed_files: print(f"\nWARNING: Could not parse {len(unparsed_files)} files:") for file in unparsed_files[:10]: # Show first 10 only to avoid spam print(f" - {os.path.basename(file)}") if len(unparsed_files) > 10: print(f" ... and {len(unparsed_files) - 10} more") return volumes def create_volume_cbz(manga_name, volume_num, chapter_infos, output_dir=None, force=False, verbose=False, extra_verbose=False): """Combine multiple chapter CBZ files into a single volume CBZ.""" # Determine output path volume_filename = f"{manga_name} - Volume {volume_num:02d}.cbz" if output_dir: os.makedirs(output_dir, exist_ok=True) output_path = os.path.join(output_dir, volume_filename) else: # Use the directory of the first chapter output_path = os.path.join(os.path.dirname(chapter_infos[0]['filename']), volume_filename) if verbose: print(f"Creating volume: {volume_filename}") print(f"Output path: {output_path}") # Check if volume already exists if os.path.exists(output_path) and not force: if verbose: print(f"Skipping {volume_filename} (already exists)") return True, "Skipped (already exists)" try: # Verify all chapter files exist before starting missing_files = [] for chapter_info in chapter_infos: if not os.path.exists(chapter_info['filename']): missing_files.append(chapter_info['filename']) if missing_files: if extra_verbose: print(f"Found {len(missing_files)} missing files, attempting to fix:") for missing in missing_files: print(f" - {os.path.basename(missing)}") # Try to fix missing files by finding alternatives chapter_dir = os.path.dirname(chapter_infos[0]['filename']) fixed_chapters, unresolved_chapters = fix_missing_files(chapter_infos, chapter_dir, extra_verbose) if unresolved_chapters: if extra_verbose: print(f"Could not resolve {len(unresolved_chapters)} missing files:") for chapter in unresolved_chapters: print(f" - {os.path.basename(chapter['filename'])}") return False, f"Missing {len(unresolved_chapters)} chapter file(s) after resolution attempts" # Update chapter_infos with the fixed list chapter_infos = fixed_chapters if extra_verbose: print("All missing files resolved, proceeding with conversion") # Create a temporary directory for extraction with tempfile.TemporaryDirectory() as temp_dir: # Extract all chapters in order for chapter_info in tqdm(chapter_infos, desc=f"Extracting chapters for Volume {volume_num}", disable=not verbose): chapter_dir = os.path.join(temp_dir, f"chapter_{chapter_info['chapter_str'].zfill(3)}") os.makedirs(chapter_dir, exist_ok=True) if verbose: print(f"Extracting chapter {chapter_info['chapter_str']}") if extra_verbose: print(f"File: {chapter_info['filename']}") if not os.path.exists(chapter_info['filename']): print(f" ERROR: File does not exist!") continue try: # Extract the chapter with zipfile.ZipFile(chapter_info['filename'], 'r') as zf: file_list = sorted(zf.namelist()) if extra_verbose: print(f" Contains {len(file_list)} files:") for i, file_name in enumerate(file_list): if file_name.endswith('/'): # Skip directories continue if extra_verbose and i < 10: # Show first 10 files only print(f" - {file_name}") # Extract with a standardized naming pattern: chapterXXX_pageYYY.ext base, ext = os.path.splitext(os.path.basename(file_name)) new_name = f"chapter{chapter_info['chapter_str'].zfill(3)}_{i+1:03d}{ext}" # Extract file to temp directory try: with zf.open(file_name) as source, open(os.path.join(chapter_dir, new_name), 'wb') as target: shutil.copyfileobj(source, target) except Exception as file_error: if extra_verbose: print(f" ERROR extracting {file_name}: {str(file_error)}") if extra_verbose and len(file_list) > 10: print(f" ... and {len(file_list) - 10} more files") except Exception as e: return False, f"Error extracting chapter {chapter_info['chapter_str']}: {str(e)}" # Create the volume CBZ if verbose: print(f"Creating volume CBZ: {volume_filename}") try: with zipfile.ZipFile(output_path, 'w') as volume_zip: # Add all files from all chapters in order chapter_dirs = sorted(os.listdir(temp_dir)) for chapter_dir in chapter_dirs: chapter_path = os.path.join(temp_dir, chapter_dir) if os.path.isdir(chapter_path): chapter_files = sorted(os.listdir(chapter_path)) if extra_verbose: print(f"Adding directory: {chapter_dir} ({len(chapter_files)} files)") for file in chapter_files: file_path = os.path.join(chapter_path, file) arc_name = os.path.join(chapter_dir, file) if extra_verbose and chapter_files.index(file) < 5: print(f" - Adding {arc_name}") volume_zip.write(file_path, arc_name) except Exception as e: return False, f"Error creating volume ZIP: {str(e)}" return True, "Created" except Exception as e: return False, str(e) def main(): parser = argparse.ArgumentParser(description='Combine individual CBZ chapters into volume CBZ files') parser.add_argument('folder', help='Folder containing CBZ chapter files') parser.add_argument('-r', '--recursive', action='store_true', help='Search for CBZ files recursively') parser.add_argument('-o', '--output', help='Output folder for volume CBZ files (defaults to same location as chapters)') parser.add_argument('-f', '--force', action='store_true', help='Force creation even if volume CBZ already exists') parser.add_argument('-v', '--verbose', action='store_true', help='Show detailed progress') parser.add_argument('-vv', '--extra-verbose', action='store_true', help='Show extremely detailed debugging information') parser.add_argument('-m', '--min-chapters', type=int, default=2, help='Minimum number of chapters required to create a volume (default: 2)') args = parser.parse_args() # If extra-verbose is enabled, automatically enable verbose too if args.extra_verbose: args.verbose = True if not os.path.isdir(args.folder): print(f"Error: '{args.folder}' is not a valid directory") return 1 cbz_files = find_cbz_files(args.folder, args.recursive, args.extra_verbose) if not cbz_files: print(f"No CBZ files found in '{args.folder}'") return 0 print(f"Found {len(cbz_files)} CBZ file(s)") # Organize files by manga and volume volumes = organize_by_volume(cbz_files, args.extra_verbose) total_manga = len(volumes) total_volumes = sum(len(volumes[manga]) for manga in volumes) print(f"Found {total_manga} manga series with {total_volumes} volume(s) to process") # Print detailed manga and volume information in extra verbose mode if args.extra_verbose: print("\nDetailed manga and volume breakdown:") for manga_name in volumes: try: first_volume = min(volumes[manga_name].keys()) manga_display_name = volumes[manga_name][first_volume][0]['manga_name'] print(f"\n{manga_display_name}:") for volume_num in sorted(volumes[manga_name].keys()): chapters = volumes[manga_name][volume_num] print(f" Volume {volume_num}: {len(chapters)} chapters") if args.extra_verbose: for chapter in chapters: print(f" - Chapter {chapter['chapter_str']}: {os.path.basename(chapter['filename'])}") print(f" Exists: {os.path.exists(chapter['filename'])}") except (ValueError, IndexError): pass success_count = 0 skip_count = 0 fail_count = 0 ignored_count = 0 # Process each manga and volume for manga_name in volumes: # Get a proper display name from the first volume's first chapter try: first_volume = min(volumes[manga_name].keys()) manga_display_name = volumes[manga_name][first_volume][0]['manga_name'] except (ValueError, IndexError, KeyError): # Fallback if we can't get proper name manga_display_name = manga_name if args.verbose: print(f"\nProcessing manga: {manga_display_name}") for volume_num in sorted(volumes[manga_name].keys()): chapters = volumes[manga_name][volume_num] # Skip volumes with too few chapters if len(chapters) < args.min_chapters: if args.verbose: print(f"Skipping Volume {volume_num} - only has {len(chapters)} chapter(s) (minimum is {args.min_chapters})") ignored_count += 1 continue # List all chapters for debugging if args.extra_verbose: print(f"\nChapters for {manga_display_name} Volume {volume_num}:") for chapter in chapters: print(f" - Chapter {chapter['chapter_str']}: {os.path.basename(chapter['filename'])}") print(f" Exists: {os.path.exists(chapter['filename'])}") success, message = create_volume_cbz( manga_display_name, volume_num, chapters, args.output, args.force, args.verbose, args.extra_verbose ) if success: if message == "Skipped (already exists)": skip_count += 1 else: success_count += 1 else: fail_count += 1 print(f"Error creating Volume {volume_num} for {manga_display_name}: {message}") print(f"\nVolume creation complete:") print(f" - {success_count} volumes created successfully") print(f" - {skip_count} volumes skipped (already exist)") print(f" - {ignored_count} volumes ignored (too few chapters)") print(f" - {fail_count} volumes failed") return 0 if __name__ == '__main__': import sys sys.exit(main())