# File: /home/code/projects/manga-organizer-1/cbz-volume-combiner/cbz_volume_combiner/core.py import os from collections import defaultdict from .parsing import parse_manga_filename from .file_utils import has_problematic_characters, find_file_by_volume_chapter def organize_by_volume(cbz_files, extra_verbose=False): """Group CBZ files by manga name and volume.""" volumes = defaultdict(lambda: defaultdict(list)) unparsed_files = [] # First, identify any files with problematic characters problematic_files = [] for cbz_file in cbz_files: has_problem, _ = has_problematic_characters(cbz_file) if has_problem: problematic_files.append(cbz_file) if problematic_files and extra_verbose: print(f"\nWARNING: Found {len(problematic_files)} problematic filenames that might need special handling.") for cbz_file in cbz_files: info = parse_manga_filename(cbz_file) if info: manga_key = info['manga_name'].lower() volumes[manga_key][info['volume']].append(info) else: unparsed_files.append(cbz_file) # For unparsed files that have problematic characters, # try to get volume and chapter from filename pattern directly has_problem, _ = has_problematic_characters(cbz_file) if has_problem: if extra_verbose: print(f"Attempting alternative parsing for problematic file: {os.path.basename(cbz_file)}") # Extract basic info using more lenient pattern base_filename = os.path.basename(cbz_file) # Look for v## and c### patterns vol_match = re.search(r'v(\d+)', base_filename) chap_match = re.search(r'c(\d+(?:\.\d+)?)', base_filename) if vol_match and chap_match: # Extract manga name (everything before v##) vol_pos = base_filename.find(f"v{vol_match.group(1)}") manga_name = base_filename[:vol_pos].strip() # Create a basic info dict vol_num = int(vol_match.group(1)) chap_str = chap_match.group(1) try: chap_num = float(chap_str) except ValueError: chap_num = 0 if extra_verbose: print(f" Extracted: manga={manga_name}, vol={vol_num}, chap={chap_str}") info = { 'manga_name': manga_name, 'volume': vol_num, 'chapter': chap_num, 'chapter_str': chap_str, 'title': '', 'group': '', 'filename': cbz_file } manga_key = manga_name.lower() volumes[manga_key][vol_num].append(info) # Remove from unparsed files since we handled it unparsed_files.remove(cbz_file) # Sort chapters within each volume for manga in volumes: for volume in volumes[manga]: volumes[manga][volume].sort(key=lambda x: x['chapter']) if extra_verbose and unparsed_files: print(f"\nWARNING: Could not parse {len(unparsed_files)} files:") for file in unparsed_files[:10]: # Show first 10 only to avoid spam print(f" - {os.path.basename(file)}") if len(unparsed_files) > 10: print(f" ... and {len(unparsed_files) - 10} more") return volumes