Files
2025-03-22 23:17:37 +00:00

132 lines
5.7 KiB
Python

# File: /home/code/projects/manga-organizer-1/cbz-volume-combiner/cbz_volume_combiner/core.py
import os, re
from collections import defaultdict
from .parsing import parse_manga_filename
from .file_utils import has_problematic_characters, find_file_by_volume_chapter
def organize_by_volume(cbz_files, extra_verbose=False):
"""Group CBZ files by manga name and volume."""
volumes = defaultdict(lambda: defaultdict(list))
unparsed_files = []
# First, identify any files with problematic characters
problematic_files = []
for cbz_file in cbz_files:
has_problem, _ = has_problematic_characters(cbz_file)
if has_problem:
problematic_files.append(cbz_file)
if problematic_files and extra_verbose:
print(f"\nWARNING: Found {len(problematic_files)} problematic filenames that might need special handling.")
# Extract manga name from the first file's directory name
if cbz_files:
default_manga_name = os.path.basename(os.path.dirname(cbz_files[0]))
if extra_verbose:
print(f"Using directory name as manga title: {default_manga_name}")
else:
default_manga_name = "Unknown Manga"
for cbz_file in cbz_files:
info = parse_manga_filename(cbz_file)
if info:
manga_key = info['manga_name'].lower()
volumes[manga_key][info['volume']].append(info)
else:
# Try alternative parsing for ALL unparsed files
base_filename = os.path.basename(cbz_file)
# Pattern for "Vol XX - YYY - Title.cbz"
alt_pattern = re.search(r'Vol\s+(\d+)\s+-\s+(\d+(?:\.\d+)?)', base_filename, re.IGNORECASE)
if alt_pattern:
if extra_verbose:
print(f"Using alternative parsing for: {base_filename}")
vol_num = int(alt_pattern.group(1))
chap_str = alt_pattern.group(2)
try:
chap_num = float(chap_str)
except ValueError:
chap_num = 0
# Create a title from everything after the chapter number
title_match = re.search(r'Vol\s+\d+\s+-\s+\d+(?:\.\d+)?\s+-\s+(.*?)\.cbz', base_filename, re.IGNORECASE)
title = title_match.group(1) if title_match else ""
if extra_verbose:
print(f" Extracted: manga={default_manga_name}, vol={vol_num}, chap={chap_str}, title={title}")
info = {
'manga_name': default_manga_name,
'volume': vol_num,
'chapter': chap_num,
'chapter_str': chap_str,
'title': title,
'group': '',
'filename': cbz_file
}
manga_key = default_manga_name.lower()
volumes[manga_key][vol_num].append(info)
else:
# Fallback to the existing problematic file handling logic
has_problem, _ = has_problematic_characters(cbz_file)
if has_problem:
if extra_verbose:
print(f"Attempting alternative parsing for problematic file: {os.path.basename(cbz_file)}")
# Extract basic info using more lenient pattern
base_filename = os.path.basename(cbz_file)
# Look for v## and c### patterns
vol_match = re.search(r'v(\d+)', base_filename)
chap_match = re.search(r'c(\d+(?:\.\d+)?)', base_filename)
if vol_match and chap_match:
# Extract manga name (everything before v##)
vol_pos = base_filename.find(f"v{vol_match.group(1)}")
manga_name = base_filename[:vol_pos].strip()
# Create a basic info dict
vol_num = int(vol_match.group(1))
chap_str = chap_match.group(1)
try:
chap_num = float(chap_str)
except ValueError:
chap_num = 0
if extra_verbose:
print(f" Extracted: manga={manga_name}, vol={vol_num}, chap={chap_str}")
info = {
'manga_name': manga_name,
'volume': vol_num,
'chapter': chap_num,
'chapter_str': chap_str,
'title': '',
'group': '',
'filename': cbz_file
}
manga_key = manga_name.lower()
volumes[manga_key][vol_num].append(info)
else:
unparsed_files.append(cbz_file)
else:
unparsed_files.append(cbz_file)
# Sort chapters within each volume
for manga in volumes:
for volume in volumes[manga]:
volumes[manga][volume].sort(key=lambda x: x['chapter'])
if extra_verbose and unparsed_files:
print(f"\nWARNING: Could not parse {len(unparsed_files)} files:")
for file in unparsed_files[:10]: # Show first 10 only to avoid spam
print(f" - {os.path.basename(file)}")
if len(unparsed_files) > 10:
print(f" ... and {len(unparsed_files) - 10} more")
return volumes