# File: /home/code/projects/manga-organizer-1/cbz-volume-combiner/cbz_volume_combiner/parsing.py
import os
import re

def normalize_filename(filename):
    """Normalize a filename to handle encoding issues and special characters."""
    # Replace common problematic characters
    replacements = {
        '?': "'",  # Replace question marks that might be incorrectly encoded apostrophes
        '?': "'",  # Another possible encoding of apostrophe
        '?': '"',  # Possible encoding of double quote
        '?': '-',  # Possible encoding of dash
        '?': ' '   # Possible encoding of space
    }
    
    for bad_char, good_char in replacements.items():
        filename = filename.replace(bad_char, good_char)
        
    return filename

def parse_manga_filename(filename):
    """Extract volume, chapter and title information from a manga filename."""
    # Pattern to match: manga_name v## c### [optional title] [optional group]
    base_filename = os.path.basename(filename)
    
    # Try to normalize the filename to handle encoding issues
    normalized_filename = normalize_filename(base_filename)
    
    # Use a more flexible pattern to handle apostrophes and other special characters
    pattern = r'(.*?)\s+v(\d+)\s+c(\d+[.\d-]*)\s+(.*)\.cbz$'
    match = re.match(pattern, normalized_filename)
    
    if match:
        manga_name = match.group(1).strip()
        volume = int(match.group(2))
        chapter_str = match.group(3)
        rest = match.group(4)
        
        # Try to extract title and group if available
        group_match = re.search(r'\[(.*?)\]$', rest)
        if group_match:
            group = group_match.group(1)
            title = rest[:rest.rfind('[')-1].strip()
        else:
            group = ""
            title = rest.strip()
        
        # Handle chapter numbers like "005.5" or "005-006"
        try:
            chapter = float(chapter_str)
        except ValueError:
            try:
                chapter = float(chapter_str.split('-')[0])  # Take first number for ranges
            except ValueError:
                chapter = 0  # Fallback for unparseable chapter numbers
        
        return {
            'manga_name': manga_name,
            'volume': volume,
            'chapter': chapter,
            'chapter_str': chapter_str,
            'title': title,
            'group': group,
            'filename': filename
        }
    
    if os.path.exists(filename):
        # For debugging: print the filename that couldn't be parsed
        print(f"WARNING: Could not parse filename: {base_filename}")
    
    return None