v0.2.2 resolve emoji issue in file names

This commit is contained in:
Ben
2025-03-22 00:37:10 +00:00
parent 9fa11aea72
commit fab2207eb8
5 changed files with 214 additions and 91 deletions

View File

@@ -2,12 +2,23 @@
import os
from collections import defaultdict
from .parsing import parse_manga_filename
from .file_utils import has_problematic_characters, find_file_by_volume_chapter
def organize_by_volume(cbz_files, extra_verbose=False):
"""Group CBZ files by manga name and volume."""
volumes = defaultdict(lambda: defaultdict(list))
unparsed_files = []
# First, identify any files with problematic characters
problematic_files = []
for cbz_file in cbz_files:
has_problem, _ = has_problematic_characters(cbz_file)
if has_problem:
problematic_files.append(cbz_file)
if problematic_files and extra_verbose:
print(f"\nWARNING: Found {len(problematic_files)} problematic filenames that might need special handling.")
for cbz_file in cbz_files:
info = parse_manga_filename(cbz_file)
if info:
@@ -15,6 +26,51 @@ def organize_by_volume(cbz_files, extra_verbose=False):
volumes[manga_key][info['volume']].append(info)
else:
unparsed_files.append(cbz_file)
# For unparsed files that have problematic characters,
# try to get volume and chapter from filename pattern directly
has_problem, _ = has_problematic_characters(cbz_file)
if has_problem:
if extra_verbose:
print(f"Attempting alternative parsing for problematic file: {os.path.basename(cbz_file)}")
# Extract basic info using more lenient pattern
base_filename = os.path.basename(cbz_file)
# Look for v## and c### patterns
vol_match = re.search(r'v(\d+)', base_filename)
chap_match = re.search(r'c(\d+(?:\.\d+)?)', base_filename)
if vol_match and chap_match:
# Extract manga name (everything before v##)
vol_pos = base_filename.find(f"v{vol_match.group(1)}")
manga_name = base_filename[:vol_pos].strip()
# Create a basic info dict
vol_num = int(vol_match.group(1))
chap_str = chap_match.group(1)
try:
chap_num = float(chap_str)
except ValueError:
chap_num = 0
if extra_verbose:
print(f" Extracted: manga={manga_name}, vol={vol_num}, chap={chap_str}")
info = {
'manga_name': manga_name,
'volume': vol_num,
'chapter': chap_num,
'chapter_str': chap_str,
'title': '',
'group': '',
'filename': cbz_file
}
manga_key = manga_name.lower()
volumes[manga_key][vol_num].append(info)
# Remove from unparsed files since we handled it
unparsed_files.remove(cbz_file)
# Sort chapters within each volume
for manga in volumes: