# File: /home/code/projects/manga-organizer-1/cbz-volume-combiner/cbz_volume_combiner/core.py import os, re from collections import defaultdict from .parsing import parse_manga_filename from .file_utils import has_problematic_characters, find_file_by_volume_chapter def organize_by_volume(cbz_files, extra_verbose=False): """Group CBZ files by manga name and volume.""" volumes = defaultdict(lambda: defaultdict(list)) unparsed_files = [] # First, identify any files with problematic characters problematic_files = [] for cbz_file in cbz_files: has_problem, _ = has_problematic_characters(cbz_file) if has_problem: problematic_files.append(cbz_file) if problematic_files and extra_verbose: print(f"\nWARNING: Found {len(problematic_files)} problematic filenames that might need special handling.") # Extract manga name from the first file's directory name if cbz_files: default_manga_name = os.path.basename(os.path.dirname(cbz_files[0])) if extra_verbose: print(f"Using directory name as manga title: {default_manga_name}") else: default_manga_name = "Unknown Manga" for cbz_file in cbz_files: info = parse_manga_filename(cbz_file) if info: manga_key = info['manga_name'].lower() volumes[manga_key][info['volume']].append(info) else: # Try alternative parsing for ALL unparsed files base_filename = os.path.basename(cbz_file) # Pattern for "Vol XX - YYY - Title.cbz" alt_pattern = re.search(r'Vol\s+(\d+)\s+-\s+(\d+(?:\.\d+)?)', base_filename, re.IGNORECASE) if alt_pattern: if extra_verbose: print(f"Using alternative parsing for: {base_filename}") vol_num = int(alt_pattern.group(1)) chap_str = alt_pattern.group(2) try: chap_num = float(chap_str) except ValueError: chap_num = 0 # Create a title from everything after the chapter number title_match = re.search(r'Vol\s+\d+\s+-\s+\d+(?:\.\d+)?\s+-\s+(.*?)\.cbz', base_filename, re.IGNORECASE) title = title_match.group(1) if title_match else "" if extra_verbose: print(f" Extracted: manga={default_manga_name}, vol={vol_num}, chap={chap_str}, title={title}") info = { 'manga_name': default_manga_name, 'volume': vol_num, 'chapter': chap_num, 'chapter_str': chap_str, 'title': title, 'group': '', 'filename': cbz_file } manga_key = default_manga_name.lower() volumes[manga_key][vol_num].append(info) else: # Fallback to the existing problematic file handling logic has_problem, _ = has_problematic_characters(cbz_file) if has_problem: if extra_verbose: print(f"Attempting alternative parsing for problematic file: {os.path.basename(cbz_file)}") # Extract basic info using more lenient pattern base_filename = os.path.basename(cbz_file) # Look for v## and c### patterns vol_match = re.search(r'v(\d+)', base_filename) chap_match = re.search(r'c(\d+(?:\.\d+)?)', base_filename) if vol_match and chap_match: # Extract manga name (everything before v##) vol_pos = base_filename.find(f"v{vol_match.group(1)}") manga_name = base_filename[:vol_pos].strip() # Create a basic info dict vol_num = int(vol_match.group(1)) chap_str = chap_match.group(1) try: chap_num = float(chap_str) except ValueError: chap_num = 0 if extra_verbose: print(f" Extracted: manga={manga_name}, vol={vol_num}, chap={chap_str}") info = { 'manga_name': manga_name, 'volume': vol_num, 'chapter': chap_num, 'chapter_str': chap_str, 'title': '', 'group': '', 'filename': cbz_file } manga_key = manga_name.lower() volumes[manga_key][vol_num].append(info) else: unparsed_files.append(cbz_file) else: unparsed_files.append(cbz_file) # Sort chapters within each volume for manga in volumes: for volume in volumes[manga]: volumes[manga][volume].sort(key=lambda x: x['chapter']) if extra_verbose and unparsed_files: print(f"\nWARNING: Could not parse {len(unparsed_files)} files:") for file in unparsed_files[:10]: # Show first 10 only to avoid spam print(f" - {os.path.basename(file)}") if len(unparsed_files) > 10: print(f" ... and {len(unparsed_files) - 10} more") return volumes