diff --git a/cbz_volume_combiner/core.py b/cbz_volume_combiner/core.py index d4ecb95..c33a265 100644 --- a/cbz_volume_combiner/core.py +++ b/cbz_volume_combiner/core.py @@ -1,5 +1,5 @@ # File: /home/code/projects/manga-organizer-1/cbz-volume-combiner/cbz_volume_combiner/core.py -import os +import os, re from collections import defaultdict from .parsing import parse_manga_filename from .file_utils import has_problematic_characters, find_file_by_volume_chapter @@ -19,58 +19,103 @@ def organize_by_volume(cbz_files, extra_verbose=False): if problematic_files and extra_verbose: print(f"\nWARNING: Found {len(problematic_files)} problematic filenames that might need special handling.") + # Extract manga name from the first file's directory name + if cbz_files: + default_manga_name = os.path.basename(os.path.dirname(cbz_files[0])) + if extra_verbose: + print(f"Using directory name as manga title: {default_manga_name}") + else: + default_manga_name = "Unknown Manga" + for cbz_file in cbz_files: info = parse_manga_filename(cbz_file) if info: manga_key = info['manga_name'].lower() volumes[manga_key][info['volume']].append(info) else: - unparsed_files.append(cbz_file) + # Try alternative parsing for ALL unparsed files + base_filename = os.path.basename(cbz_file) - # For unparsed files that have problematic characters, - # try to get volume and chapter from filename pattern directly - has_problem, _ = has_problematic_characters(cbz_file) - if has_problem: + # Pattern for "Vol XX - YYY - Title.cbz" + alt_pattern = re.search(r'Vol\s+(\d+)\s+-\s+(\d+(?:\.\d+)?)', base_filename, re.IGNORECASE) + + if alt_pattern: if extra_verbose: - print(f"Attempting alternative parsing for problematic file: {os.path.basename(cbz_file)}") + print(f"Using alternative parsing for: {base_filename}") - # Extract basic info using more lenient pattern - base_filename = os.path.basename(cbz_file) - # Look for v## and c### patterns - vol_match = re.search(r'v(\d+)', base_filename) - chap_match = re.search(r'c(\d+(?:\.\d+)?)', base_filename) + vol_num = int(alt_pattern.group(1)) + chap_str = alt_pattern.group(2) - if vol_match and chap_match: - # Extract manga name (everything before v##) - vol_pos = base_filename.find(f"v{vol_match.group(1)}") - manga_name = base_filename[:vol_pos].strip() - - # Create a basic info dict - vol_num = int(vol_match.group(1)) - chap_str = chap_match.group(1) - - try: - chap_num = float(chap_str) - except ValueError: - chap_num = 0 - + try: + chap_num = float(chap_str) + except ValueError: + chap_num = 0 + + # Create a title from everything after the chapter number + title_match = re.search(r'Vol\s+\d+\s+-\s+\d+(?:\.\d+)?\s+-\s+(.*?)\.cbz', base_filename, re.IGNORECASE) + title = title_match.group(1) if title_match else "" + + if extra_verbose: + print(f" Extracted: manga={default_manga_name}, vol={vol_num}, chap={chap_str}, title={title}") + + info = { + 'manga_name': default_manga_name, + 'volume': vol_num, + 'chapter': chap_num, + 'chapter_str': chap_str, + 'title': title, + 'group': '', + 'filename': cbz_file + } + + manga_key = default_manga_name.lower() + volumes[manga_key][vol_num].append(info) + else: + # Fallback to the existing problematic file handling logic + has_problem, _ = has_problematic_characters(cbz_file) + if has_problem: if extra_verbose: - print(f" Extracted: manga={manga_name}, vol={vol_num}, chap={chap_str}") + print(f"Attempting alternative parsing for problematic file: {os.path.basename(cbz_file)}") - info = { - 'manga_name': manga_name, - 'volume': vol_num, - 'chapter': chap_num, - 'chapter_str': chap_str, - 'title': '', - 'group': '', - 'filename': cbz_file - } + # Extract basic info using more lenient pattern + base_filename = os.path.basename(cbz_file) + # Look for v## and c### patterns + vol_match = re.search(r'v(\d+)', base_filename) + chap_match = re.search(r'c(\d+(?:\.\d+)?)', base_filename) - manga_key = manga_name.lower() - volumes[manga_key][vol_num].append(info) - # Remove from unparsed files since we handled it - unparsed_files.remove(cbz_file) + if vol_match and chap_match: + # Extract manga name (everything before v##) + vol_pos = base_filename.find(f"v{vol_match.group(1)}") + manga_name = base_filename[:vol_pos].strip() + + # Create a basic info dict + vol_num = int(vol_match.group(1)) + chap_str = chap_match.group(1) + + try: + chap_num = float(chap_str) + except ValueError: + chap_num = 0 + + if extra_verbose: + print(f" Extracted: manga={manga_name}, vol={vol_num}, chap={chap_str}") + + info = { + 'manga_name': manga_name, + 'volume': vol_num, + 'chapter': chap_num, + 'chapter_str': chap_str, + 'title': '', + 'group': '', + 'filename': cbz_file + } + + manga_key = manga_name.lower() + volumes[manga_key][vol_num].append(info) + else: + unparsed_files.append(cbz_file) + else: + unparsed_files.append(cbz_file) # Sort chapters within each volume for manga in volumes: