v0.2.2 resolve emoji issue in file names

2025-03-22 00:37:10 +00:00
parent 9fa11aea72
commit fab2207eb8
5 changed files with 214 additions and 91 deletions
--- a/cbz_volume_combiner/core.py
+++ b/cbz_volume_combiner/core.py
@@ -2,12 +2,23 @@
 import os
 from collections import defaultdict
 from .parsing import parse_manga_filename
+from .file_utils import has_problematic_characters, find_file_by_volume_chapter

 def organize_by_volume(cbz_files, extra_verbose=False):
    """Group CBZ files by manga name and volume."""
    volumes = defaultdict(lambda: defaultdict(list))
    unparsed_files = []
    
+    # First, identify any files with problematic characters
+    problematic_files = []
+    for cbz_file in cbz_files:
+        has_problem, _ = has_problematic_characters(cbz_file)
+        if has_problem:
+            problematic_files.append(cbz_file)
+            
+    if problematic_files and extra_verbose:
+        print(f"\nWARNING: Found {len(problematic_files)} problematic filenames that might need special handling.")
+    
    for cbz_file in cbz_files:
        info = parse_manga_filename(cbz_file)
        if info:
@@ -15,6 +26,51 @@ def organize_by_volume(cbz_files, extra_verbose=False):
            volumes[manga_key][info['volume']].append(info)
        else:
            unparsed_files.append(cbz_file)
+            
+            # For unparsed files that have problematic characters,
+            # try to get volume and chapter from filename pattern directly
+            has_problem, _ = has_problematic_characters(cbz_file)
+            if has_problem:
+                if extra_verbose:
+                    print(f"Attempting alternative parsing for problematic file: {os.path.basename(cbz_file)}")
+                
+                # Extract basic info using more lenient pattern
+                base_filename = os.path.basename(cbz_file)
+                # Look for v## and c### patterns
+                vol_match = re.search(r'v(\d+)', base_filename)
+                chap_match = re.search(r'c(\d+(?:\.\d+)?)', base_filename)
+                
+                if vol_match and chap_match:
+                    # Extract manga name (everything before v##)
+                    vol_pos = base_filename.find(f"v{vol_match.group(1)}")
+                    manga_name = base_filename[:vol_pos].strip()
+                    
+                    # Create a basic info dict
+                    vol_num = int(vol_match.group(1))
+                    chap_str = chap_match.group(1)
+                    
+                    try:
+                        chap_num = float(chap_str)
+                    except ValueError:
+                        chap_num = 0
+                    
+                    if extra_verbose:
+                        print(f"  Extracted: manga={manga_name}, vol={vol_num}, chap={chap_str}")
+                    
+                    info = {
+                        'manga_name': manga_name,
+                        'volume': vol_num,
+                        'chapter': chap_num,
+                        'chapter_str': chap_str,
+                        'title': '',
+                        'group': '',
+                        'filename': cbz_file
+                    }
+                    
+                    manga_key = manga_name.lower()
+                    volumes[manga_key][vol_num].append(info)
+                    # Remove from unparsed files since we handled it
+                    unparsed_files.remove(cbz_file)
    
    # Sort chapters within each volume
    for manga in volumes: