Init CBZ Volume Combiner v0.2.0
This commit is contained in:
71
cbz_volume_combiner/parsing.py
Normal file
71
cbz_volume_combiner/parsing.py
Normal file
@@ -0,0 +1,71 @@
|
||||
# File: /home/code/projects/manga-organizer-1/cbz-volume-combiner/cbz_volume_combiner/parsing.py
|
||||
import os
|
||||
import re
|
||||
|
||||
def normalize_filename(filename):
|
||||
"""Normalize a filename to handle encoding issues and special characters."""
|
||||
# Replace common problematic characters
|
||||
replacements = {
|
||||
'?': "'", # Replace question marks that might be incorrectly encoded apostrophes
|
||||
'?': "'", # Another possible encoding of apostrophe
|
||||
'?': '"', # Possible encoding of double quote
|
||||
'?': '-', # Possible encoding of dash
|
||||
'?': ' ' # Possible encoding of space
|
||||
}
|
||||
|
||||
for bad_char, good_char in replacements.items():
|
||||
filename = filename.replace(bad_char, good_char)
|
||||
|
||||
return filename
|
||||
|
||||
def parse_manga_filename(filename):
|
||||
"""Extract volume, chapter and title information from a manga filename."""
|
||||
# Pattern to match: manga_name v## c### [optional title] [optional group]
|
||||
base_filename = os.path.basename(filename)
|
||||
|
||||
# Try to normalize the filename to handle encoding issues
|
||||
normalized_filename = normalize_filename(base_filename)
|
||||
|
||||
# Use a more flexible pattern to handle apostrophes and other special characters
|
||||
pattern = r'(.*?)\s+v(\d+)\s+c(\d+[.\d-]*)\s+(.*)\.cbz$'
|
||||
match = re.match(pattern, normalized_filename)
|
||||
|
||||
if match:
|
||||
manga_name = match.group(1).strip()
|
||||
volume = int(match.group(2))
|
||||
chapter_str = match.group(3)
|
||||
rest = match.group(4)
|
||||
|
||||
# Try to extract title and group if available
|
||||
group_match = re.search(r'\[(.*?)\]$', rest)
|
||||
if group_match:
|
||||
group = group_match.group(1)
|
||||
title = rest[:rest.rfind('[')-1].strip()
|
||||
else:
|
||||
group = ""
|
||||
title = rest.strip()
|
||||
|
||||
# Handle chapter numbers like "005.5" or "005-006"
|
||||
try:
|
||||
chapter = float(chapter_str)
|
||||
except ValueError:
|
||||
try:
|
||||
chapter = float(chapter_str.split('-')[0]) # Take first number for ranges
|
||||
except ValueError:
|
||||
chapter = 0 # Fallback for unparseable chapter numbers
|
||||
|
||||
return {
|
||||
'manga_name': manga_name,
|
||||
'volume': volume,
|
||||
'chapter': chapter,
|
||||
'chapter_str': chapter_str,
|
||||
'title': title,
|
||||
'group': group,
|
||||
'filename': filename
|
||||
}
|
||||
|
||||
if os.path.exists(filename):
|
||||
# For debugging: print the filename that couldn't be parsed
|
||||
print(f"WARNING: Could not parse filename: {base_filename}")
|
||||
|
||||
return None
|
||||
Reference in New Issue
Block a user