v0.2.2 resolve emoji issue in file names
This commit is contained in:
@@ -4,4 +4,4 @@ from .file_utils import find_cbz_files, fix_missing_files
|
|||||||
from .core import organize_by_volume
|
from .core import organize_by_volume
|
||||||
from .volume import create_volume_cbz
|
from .volume import create_volume_cbz
|
||||||
|
|
||||||
__version__ = "0.2.0"
|
__version__ = "0.2.2"
|
||||||
@@ -2,12 +2,23 @@
|
|||||||
import os
|
import os
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from .parsing import parse_manga_filename
|
from .parsing import parse_manga_filename
|
||||||
|
from .file_utils import has_problematic_characters, find_file_by_volume_chapter
|
||||||
|
|
||||||
def organize_by_volume(cbz_files, extra_verbose=False):
|
def organize_by_volume(cbz_files, extra_verbose=False):
|
||||||
"""Group CBZ files by manga name and volume."""
|
"""Group CBZ files by manga name and volume."""
|
||||||
volumes = defaultdict(lambda: defaultdict(list))
|
volumes = defaultdict(lambda: defaultdict(list))
|
||||||
unparsed_files = []
|
unparsed_files = []
|
||||||
|
|
||||||
|
# First, identify any files with problematic characters
|
||||||
|
problematic_files = []
|
||||||
|
for cbz_file in cbz_files:
|
||||||
|
has_problem, _ = has_problematic_characters(cbz_file)
|
||||||
|
if has_problem:
|
||||||
|
problematic_files.append(cbz_file)
|
||||||
|
|
||||||
|
if problematic_files and extra_verbose:
|
||||||
|
print(f"\nWARNING: Found {len(problematic_files)} problematic filenames that might need special handling.")
|
||||||
|
|
||||||
for cbz_file in cbz_files:
|
for cbz_file in cbz_files:
|
||||||
info = parse_manga_filename(cbz_file)
|
info = parse_manga_filename(cbz_file)
|
||||||
if info:
|
if info:
|
||||||
@@ -15,6 +26,51 @@ def organize_by_volume(cbz_files, extra_verbose=False):
|
|||||||
volumes[manga_key][info['volume']].append(info)
|
volumes[manga_key][info['volume']].append(info)
|
||||||
else:
|
else:
|
||||||
unparsed_files.append(cbz_file)
|
unparsed_files.append(cbz_file)
|
||||||
|
|
||||||
|
# For unparsed files that have problematic characters,
|
||||||
|
# try to get volume and chapter from filename pattern directly
|
||||||
|
has_problem, _ = has_problematic_characters(cbz_file)
|
||||||
|
if has_problem:
|
||||||
|
if extra_verbose:
|
||||||
|
print(f"Attempting alternative parsing for problematic file: {os.path.basename(cbz_file)}")
|
||||||
|
|
||||||
|
# Extract basic info using more lenient pattern
|
||||||
|
base_filename = os.path.basename(cbz_file)
|
||||||
|
# Look for v## and c### patterns
|
||||||
|
vol_match = re.search(r'v(\d+)', base_filename)
|
||||||
|
chap_match = re.search(r'c(\d+(?:\.\d+)?)', base_filename)
|
||||||
|
|
||||||
|
if vol_match and chap_match:
|
||||||
|
# Extract manga name (everything before v##)
|
||||||
|
vol_pos = base_filename.find(f"v{vol_match.group(1)}")
|
||||||
|
manga_name = base_filename[:vol_pos].strip()
|
||||||
|
|
||||||
|
# Create a basic info dict
|
||||||
|
vol_num = int(vol_match.group(1))
|
||||||
|
chap_str = chap_match.group(1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
chap_num = float(chap_str)
|
||||||
|
except ValueError:
|
||||||
|
chap_num = 0
|
||||||
|
|
||||||
|
if extra_verbose:
|
||||||
|
print(f" Extracted: manga={manga_name}, vol={vol_num}, chap={chap_str}")
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'manga_name': manga_name,
|
||||||
|
'volume': vol_num,
|
||||||
|
'chapter': chap_num,
|
||||||
|
'chapter_str': chap_str,
|
||||||
|
'title': '',
|
||||||
|
'group': '',
|
||||||
|
'filename': cbz_file
|
||||||
|
}
|
||||||
|
|
||||||
|
manga_key = manga_name.lower()
|
||||||
|
volumes[manga_key][vol_num].append(info)
|
||||||
|
# Remove from unparsed files since we handled it
|
||||||
|
unparsed_files.remove(cbz_file)
|
||||||
|
|
||||||
# Sort chapters within each volume
|
# Sort chapters within each volume
|
||||||
for manga in volumes:
|
for manga in volumes:
|
||||||
|
|||||||
@@ -1,8 +1,23 @@
|
|||||||
# File: /home/code/projects/manga-organizer-1/cbz-volume-combiner/cbz_volume_combiner/file_utils.py
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import unicodedata
|
||||||
from .parsing import parse_manga_filename
|
from .parsing import parse_manga_filename
|
||||||
|
|
||||||
|
def is_emoji(char):
|
||||||
|
"""Check if a character is an emoji or other problematic symbol."""
|
||||||
|
return (
|
||||||
|
unicodedata.category(char) in ('So', 'Sm') or # Symbol categories
|
||||||
|
ord(char) > 0x1F000 # Emoji range
|
||||||
|
)
|
||||||
|
|
||||||
|
def has_problematic_characters(filename):
|
||||||
|
"""Check if filename contains emoji or problematic characters."""
|
||||||
|
basename = os.path.basename(filename)
|
||||||
|
for char in basename:
|
||||||
|
if is_emoji(char) or char == '?':
|
||||||
|
return True, char
|
||||||
|
return False, None
|
||||||
|
|
||||||
def find_cbz_files(folder_path, recursive=False, extra_verbose=False):
|
def find_cbz_files(folder_path, recursive=False, extra_verbose=False):
|
||||||
"""Find all CBZ files in the given folder."""
|
"""Find all CBZ files in the given folder."""
|
||||||
cbz_files = []
|
cbz_files = []
|
||||||
@@ -39,87 +54,118 @@ def fix_missing_files(chapter_infos, folder_path, extra_verbose=False):
|
|||||||
fixed_chapters.append(chapter_info)
|
fixed_chapters.append(chapter_info)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# If we're here, the file doesn't exist - get the problematic filename
|
# If file doesn't exist, look for it by volume and chapter
|
||||||
base_filename = os.path.basename(chapter_info['filename'])
|
|
||||||
directory = os.path.dirname(chapter_info['filename'])
|
directory = os.path.dirname(chapter_info['filename'])
|
||||||
|
vol_num = chapter_info['volume']
|
||||||
|
chap_num = chapter_info['chapter_str']
|
||||||
|
|
||||||
if extra_verbose:
|
if extra_verbose:
|
||||||
print(f"Trying to find replacement for: {base_filename}")
|
print(f"Looking for alternative for {os.path.basename(chapter_info['filename'])}")
|
||||||
|
|
||||||
# First approach: Direct check with normalized path
|
# Find file by volume and chapter numbers directly
|
||||||
# This is for cases where Python's path handling might be different from the filesystem
|
actual_file = find_file_by_volume_chapter(directory, vol_num, chap_num, extra_verbose)
|
||||||
found_replacement = False
|
|
||||||
|
|
||||||
# Look for similar files in the directory
|
if actual_file and os.path.exists(actual_file):
|
||||||
try:
|
|
||||||
# Get all CBZ files in the directory
|
|
||||||
cbz_files_in_dir = [f for f in os.listdir(directory) if f.lower().endswith('.cbz')]
|
|
||||||
|
|
||||||
# First try: Look for exact matches with volume and chapter numbers
|
|
||||||
vol_num = chapter_info['volume']
|
|
||||||
chap_num = chapter_info['chapter_str']
|
|
||||||
|
|
||||||
# Improved pattern matching for volume and chapter numbers
|
|
||||||
vol_pattern = f"v{vol_num:02d}" # e.g., "v07"
|
|
||||||
chap_pattern = f"c{chap_num}" # e.g., "c037"
|
|
||||||
|
|
||||||
# Try to find a direct match first
|
|
||||||
for file in cbz_files_in_dir:
|
|
||||||
# Check volume and chapter patterns
|
|
||||||
if vol_pattern in file and chap_pattern in file:
|
|
||||||
potential_path = os.path.join(directory, file)
|
|
||||||
if os.path.exists(potential_path):
|
|
||||||
if extra_verbose:
|
|
||||||
print(f" Found direct match: {file}")
|
|
||||||
new_chapter_info = dict(chapter_info)
|
|
||||||
new_chapter_info['filename'] = potential_path
|
|
||||||
fixed_chapters.append(new_chapter_info)
|
|
||||||
found_replacement = True
|
|
||||||
break
|
|
||||||
|
|
||||||
# If no direct match, try more flexible matching
|
|
||||||
if not found_replacement:
|
|
||||||
# Fuzzy match approach
|
|
||||||
chapter_pattern = rf"v0*{vol_num}\s+c0*{chap_num.lstrip('0')}"
|
|
||||||
|
|
||||||
for file in cbz_files_in_dir:
|
|
||||||
# Remove special characters for comparison
|
|
||||||
clean_file = file
|
|
||||||
for char in "'?,":
|
|
||||||
clean_file = clean_file.replace(char, '')
|
|
||||||
|
|
||||||
# Strip special characters from the pattern too
|
|
||||||
clean_pattern = chapter_pattern
|
|
||||||
for char in "'?,":
|
|
||||||
clean_pattern = clean_pattern.replace(char, '')
|
|
||||||
|
|
||||||
# Try matching with cleaned strings
|
|
||||||
if re.search(chapter_pattern, file, re.IGNORECASE) or re.search(clean_pattern, clean_file, re.IGNORECASE):
|
|
||||||
potential_path = os.path.join(directory, file)
|
|
||||||
if os.path.exists(potential_path):
|
|
||||||
if extra_verbose:
|
|
||||||
print(f" Found fuzzy match: {file}")
|
|
||||||
new_chapter_info = dict(chapter_info)
|
|
||||||
new_chapter_info['filename'] = potential_path
|
|
||||||
fixed_chapters.append(new_chapter_info)
|
|
||||||
found_replacement = True
|
|
||||||
break
|
|
||||||
|
|
||||||
# Last resort: List all files and let the user see what's available
|
|
||||||
if not found_replacement and extra_verbose:
|
|
||||||
print(" No match found. Available files in directory:")
|
|
||||||
for idx, file in enumerate(sorted(cbz_files_in_dir)):
|
|
||||||
if idx < 20: # Limit to first 20 files to avoid spam
|
|
||||||
print(f" - {file}")
|
|
||||||
else:
|
|
||||||
print(f" ... and {len(cbz_files_in_dir) - 20} more files")
|
|
||||||
break
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
if extra_verbose:
|
if extra_verbose:
|
||||||
print(f" Error while searching for replacement: {str(e)}")
|
print(f"Found alternative file: {os.path.basename(actual_file)}")
|
||||||
|
|
||||||
if not found_replacement:
|
new_chapter_info = dict(chapter_info)
|
||||||
|
new_chapter_info['filename'] = actual_file
|
||||||
|
fixed_chapters.append(new_chapter_info)
|
||||||
|
else:
|
||||||
|
if extra_verbose:
|
||||||
|
print(f"No alternative found for volume {vol_num}, chapter {chap_num}")
|
||||||
unresolved_chapters.append(chapter_info)
|
unresolved_chapters.append(chapter_info)
|
||||||
|
|
||||||
return fixed_chapters, unresolved_chapters
|
return fixed_chapters, unresolved_chapters
|
||||||
|
|
||||||
|
def find_file_by_volume_chapter(directory, volume_num, chapter_num, extra_verbose=False):
|
||||||
|
"""Find a file in a directory by its volume and chapter number only."""
|
||||||
|
if not os.path.exists(directory):
|
||||||
|
if extra_verbose:
|
||||||
|
print(f"Directory does not exist: {directory}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
if extra_verbose:
|
||||||
|
print(f"Looking for volume {volume_num}, chapter {chapter_num} in {directory}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
files = [f for f in os.listdir(directory) if f.lower().endswith('.cbz')]
|
||||||
|
|
||||||
|
# We'll only focus on the essential part: finding files by volume and chapter numbers
|
||||||
|
# regardless of special characters or encoding issues
|
||||||
|
|
||||||
|
# First: Look for exact pattern matches
|
||||||
|
for file in files:
|
||||||
|
# Look for both zero-padded and non-zero-padded versions
|
||||||
|
v_patterns = [f"v{volume_num:02d}", f"v{volume_num}"]
|
||||||
|
c_patterns = [f"c{chapter_num}", f"c{chapter_num.zfill(3)}" if chapter_num.isdigit() else f"c{chapter_num}"]
|
||||||
|
|
||||||
|
for v_pattern in v_patterns:
|
||||||
|
for c_pattern in c_patterns:
|
||||||
|
if v_pattern in file and c_pattern in file:
|
||||||
|
if extra_verbose:
|
||||||
|
print(f"Found exact match: {file}")
|
||||||
|
return os.path.join(directory, file)
|
||||||
|
|
||||||
|
# Second: Use regex pattern matching
|
||||||
|
for file in files:
|
||||||
|
v_match = re.search(fr'v0*{volume_num}[^0-9]', file.lower())
|
||||||
|
c_match = re.search(fr'c0*{chapter_num}[^0-9]', file.lower())
|
||||||
|
|
||||||
|
if v_match and c_match:
|
||||||
|
if extra_verbose:
|
||||||
|
print(f"Found regex match: {file}")
|
||||||
|
return os.path.join(directory, file)
|
||||||
|
|
||||||
|
# Third: Last resort - simplified alphanumeric comparison
|
||||||
|
for file in files:
|
||||||
|
clean_file = ''.join(c.lower() for c in file if c.isalnum())
|
||||||
|
if f"v{volume_num}" in clean_file and f"c{chapter_num}" in clean_file:
|
||||||
|
if extra_verbose:
|
||||||
|
print(f"Found simplified match: {file}")
|
||||||
|
return os.path.join(directory, file)
|
||||||
|
|
||||||
|
if extra_verbose:
|
||||||
|
print(f"No match found for volume {volume_num}, chapter {chapter_num}")
|
||||||
|
print("Available files:")
|
||||||
|
for file in sorted(files)[:10]:
|
||||||
|
print(f" - {file}")
|
||||||
|
if len(files) > 10:
|
||||||
|
print(f" ... and {len(files) - 10} more")
|
||||||
|
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
if extra_verbose:
|
||||||
|
print(f"Error searching for file: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def create_clean_filename_mapping(cbz_files, extra_verbose=False):
|
||||||
|
"""Create a mapping of problematic filenames to clean alternatives."""
|
||||||
|
filename_mapping = {}
|
||||||
|
|
||||||
|
for file_path in cbz_files:
|
||||||
|
has_problem, _ = has_problematic_characters(file_path)
|
||||||
|
|
||||||
|
if has_problem:
|
||||||
|
# Get the directory and filename
|
||||||
|
directory = os.path.dirname(file_path)
|
||||||
|
filename = os.path.basename(file_path)
|
||||||
|
|
||||||
|
# Create a clean version of the filename by removing problematic characters
|
||||||
|
clean_filename = ''.join(c if not is_emoji(c) and c != '?' else '_' for c in filename)
|
||||||
|
|
||||||
|
# Make sure we don't create duplicates
|
||||||
|
base, ext = os.path.splitext(clean_filename)
|
||||||
|
counter = 1
|
||||||
|
while os.path.exists(os.path.join(directory, clean_filename)):
|
||||||
|
clean_filename = f"{base}_{counter}{ext}"
|
||||||
|
counter += 1
|
||||||
|
|
||||||
|
# Add to mapping
|
||||||
|
filename_mapping[file_path] = os.path.join(directory, clean_filename)
|
||||||
|
|
||||||
|
if extra_verbose:
|
||||||
|
print(f"Mapped: {filename} -> {clean_filename}")
|
||||||
|
|
||||||
|
return filename_mapping
|
||||||
@@ -4,7 +4,7 @@ import zipfile
|
|||||||
import tempfile
|
import tempfile
|
||||||
import shutil
|
import shutil
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from .file_utils import fix_missing_files
|
from .file_utils import fix_missing_files, find_file_by_volume_chapter, has_problematic_characters
|
||||||
|
|
||||||
def create_volume_cbz(manga_name, volume_num, chapter_infos, output_dir=None, force=False, verbose=False, extra_verbose=False):
|
def create_volume_cbz(manga_name, volume_num, chapter_infos, output_dir=None, force=False, verbose=False, extra_verbose=False):
|
||||||
"""Combine multiple chapter CBZ files into a single volume CBZ."""
|
"""Combine multiple chapter CBZ files into a single volume CBZ."""
|
||||||
@@ -88,11 +88,36 @@ def create_volume_cbz(manga_name, volume_num, chapter_infos, output_dir=None, fo
|
|||||||
if verbose:
|
if verbose:
|
||||||
print(f"Extracting chapter {chapter_info['chapter_str']}")
|
print(f"Extracting chapter {chapter_info['chapter_str']}")
|
||||||
|
|
||||||
|
# IMPORTANT: Handle file matching right before extraction
|
||||||
|
file_exists = os.path.exists(chapter_info['filename'])
|
||||||
|
|
||||||
|
if not file_exists:
|
||||||
|
# Try to find the file by volume and chapter
|
||||||
|
directory = os.path.dirname(chapter_info['filename'])
|
||||||
|
vol_num = chapter_info['volume']
|
||||||
|
chap_num = chapter_info['chapter_str']
|
||||||
|
|
||||||
|
if extra_verbose:
|
||||||
|
print(f"File not found: {chapter_info['filename']}")
|
||||||
|
print(f"Looking for alternative file with volume {vol_num}, chapter {chap_num}")
|
||||||
|
|
||||||
|
# Find by volume and chapter numbers only
|
||||||
|
actual_file = find_file_by_volume_chapter(directory, vol_num, chap_num, extra_verbose)
|
||||||
|
|
||||||
|
if actual_file and os.path.exists(actual_file):
|
||||||
|
chapter_info['filename'] = actual_file
|
||||||
|
file_exists = True
|
||||||
|
if extra_verbose:
|
||||||
|
print(f"Found alternative file: {actual_file}")
|
||||||
|
|
||||||
|
# Skip if file still doesn't exist
|
||||||
|
if not file_exists:
|
||||||
|
if extra_verbose:
|
||||||
|
print(f"Skipping chapter {chapter_info['chapter_str']} - file not found")
|
||||||
|
continue
|
||||||
|
|
||||||
if extra_verbose:
|
if extra_verbose:
|
||||||
print(f"File: {chapter_info['filename']}")
|
print(f"File: {chapter_info['filename']}")
|
||||||
if not os.path.exists(chapter_info['filename']):
|
|
||||||
print(f" ERROR: File does not exist!")
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Extract the chapter
|
# Extract the chapter
|
||||||
@@ -100,16 +125,13 @@ def create_volume_cbz(manga_name, volume_num, chapter_infos, output_dir=None, fo
|
|||||||
file_list = sorted(zf.namelist())
|
file_list = sorted(zf.namelist())
|
||||||
|
|
||||||
if extra_verbose:
|
if extra_verbose:
|
||||||
print(f" Contains {len(file_list)} files:")
|
print(f" Contains {len(file_list)} files")
|
||||||
|
|
||||||
for i, file_name in enumerate(file_list):
|
for i, file_name in enumerate(file_list):
|
||||||
if file_name.endswith('/'): # Skip directories
|
if file_name.endswith('/'): # Skip directories
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if extra_verbose and i < 10: # Show first 10 files only
|
# Extract with a standardized naming pattern
|
||||||
print(f" - {file_name}")
|
|
||||||
|
|
||||||
# Extract with a standardized naming pattern: chapterXXX_pageYYY.ext
|
|
||||||
base, ext = os.path.splitext(os.path.basename(file_name))
|
base, ext = os.path.splitext(os.path.basename(file_name))
|
||||||
new_name = f"chapter{chapter_info['chapter_str'].zfill(3)}_{i+1:03d}{ext}"
|
new_name = f"chapter{chapter_info['chapter_str'].zfill(3)}_{i+1:03d}{ext}"
|
||||||
|
|
||||||
@@ -120,11 +142,10 @@ def create_volume_cbz(manga_name, volume_num, chapter_infos, output_dir=None, fo
|
|||||||
except Exception as file_error:
|
except Exception as file_error:
|
||||||
if extra_verbose:
|
if extra_verbose:
|
||||||
print(f" ERROR extracting {file_name}: {str(file_error)}")
|
print(f" ERROR extracting {file_name}: {str(file_error)}")
|
||||||
|
|
||||||
if extra_verbose and len(file_list) > 10:
|
|
||||||
print(f" ... and {len(file_list) - 10} more files")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return False, f"Error extracting chapter {chapter_info['chapter_str']}: {str(e)}"
|
if extra_verbose:
|
||||||
|
print(f"Error extracting chapter: {e}")
|
||||||
|
continue # Skip this chapter but continue with others
|
||||||
|
|
||||||
# Create the volume CBZ
|
# Create the volume CBZ
|
||||||
if verbose:
|
if verbose:
|
||||||
|
|||||||
2
setup.py
2
setup.py
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="cbz-volume-combiner",
|
name="cbz-volume-combiner",
|
||||||
version="0.2.0",
|
version="0.2.2",
|
||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
scripts=['bin/cbz-volume-combiner'],
|
scripts=['bin/cbz-volume-combiner'],
|
||||||
install_requires=[
|
install_requires=[
|
||||||
|
|||||||
Reference in New Issue
Block a user