Initial commit
This commit is contained in:
275
label/cores/cores.py
Normal file
275
label/cores/cores.py
Normal file
@@ -0,0 +1,275 @@
|
||||
import os
|
||||
import shutil
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from pdf2image import convert_from_path
|
||||
|
||||
def clear_output_folder(folder):
|
||||
"""Clear and create output folder."""
|
||||
if os.path.exists(folder):
|
||||
shutil.rmtree(folder)
|
||||
os.makedirs(folder)
|
||||
|
||||
def load_image(file_path, dpi=300):
|
||||
"""Load image from PDF or image file."""
|
||||
file_ext = os.path.splitext(file_path)[1].lower()
|
||||
|
||||
if file_ext == '.pdf':
|
||||
print(f"Converting PDF to image (DPI: {dpi})...")
|
||||
images = convert_from_path(file_path, dpi=dpi, fmt='png')
|
||||
img = images[0] # First page only
|
||||
print(f" Converted: {img.size[0]}x{img.size[1]}")
|
||||
else:
|
||||
print(f"Loading image...")
|
||||
img = Image.open(file_path)
|
||||
print(f" Loaded: {img.size[0]}x{img.size[1]}")
|
||||
|
||||
# Convert to RGB
|
||||
if img.mode != 'RGB':
|
||||
img = img.convert('RGB')
|
||||
|
||||
return img
|
||||
|
||||
def find_main_colors(img, color_threshold=30, min_percentage=0.5):
|
||||
"""
|
||||
Find main distinct colors in image.
|
||||
|
||||
Algorithm:
|
||||
1. Find most common color
|
||||
2. Group all colors within distance threshold (Euclidean distance in RGB space)
|
||||
3. Remove those colors
|
||||
4. Find next most common color
|
||||
5. Repeat until no colors left
|
||||
|
||||
Parameters:
|
||||
- color_threshold: Maximum distance between colors to group them (0-441)
|
||||
Distance = sqrt((R1-R2)² + (G1-G2)² + (B1-B2)²)
|
||||
- min_percentage: Minimum percentage to be a "main" color
|
||||
"""
|
||||
print(f"\nAnalyzing colors...")
|
||||
print(f" Color distance threshold: {color_threshold}")
|
||||
print(f" Minimum percentage: {min_percentage}%")
|
||||
|
||||
# Get all pixels
|
||||
pixels = np.array(img)
|
||||
h, w = pixels.shape[:2]
|
||||
pixels = pixels.reshape(-1, 3)
|
||||
total_pixels = len(pixels)
|
||||
|
||||
# Remove white background (>= 250 in all RGB channels)
|
||||
is_white = (pixels[:, 0] >= 250) & (pixels[:, 1] >= 250) & (pixels[:, 2] >= 250)
|
||||
pixels = pixels[~is_white]
|
||||
|
||||
print(f" Total pixels: {total_pixels:,}")
|
||||
print(f" White background: {np.sum(is_white):,} ({np.sum(is_white)/total_pixels*100:.1f}%) - IGNORED")
|
||||
print(f" Color pixels: {len(pixels):,} ({len(pixels)/total_pixels*100:.1f}%)")
|
||||
|
||||
if len(pixels) == 0:
|
||||
print(" Error: Image is entirely white!")
|
||||
return []
|
||||
|
||||
# Get unique colors and their counts
|
||||
unique_colors, counts = np.unique(pixels, axis=0, return_counts=True)
|
||||
print(f" Unique colors found: {len(unique_colors):,}")
|
||||
|
||||
# Greedy grouping by frequency
|
||||
print(f"\n Grouping colors (greedy by frequency)...")
|
||||
|
||||
color_groups = []
|
||||
remaining = np.ones(len(unique_colors), dtype=bool) # Track which colors are still available
|
||||
|
||||
iteration = 0
|
||||
while np.any(remaining):
|
||||
iteration += 1
|
||||
|
||||
# Find most common remaining color
|
||||
remaining_counts = counts.copy()
|
||||
remaining_counts[~remaining] = 0 # Zero out already-used colors
|
||||
|
||||
if np.max(remaining_counts) == 0:
|
||||
break
|
||||
|
||||
most_common_idx = np.argmax(remaining_counts)
|
||||
base_color = unique_colors[most_common_idx]
|
||||
|
||||
# Calculate Euclidean distance from base_color to all colors
|
||||
# Distance = sqrt((R1-R2)² + (G1-G2)² + (B1-B2)²)
|
||||
diff = unique_colors.astype(float) - base_color.astype(float)
|
||||
distances = np.sqrt(np.sum(diff ** 2, axis=1))
|
||||
|
||||
# Find all colors within threshold distance
|
||||
within_threshold = (distances <= color_threshold) & remaining
|
||||
|
||||
# Mark these colors as used
|
||||
remaining[within_threshold] = False
|
||||
|
||||
# Group info
|
||||
group_colors = unique_colors[within_threshold]
|
||||
group_counts = counts[within_threshold]
|
||||
total_count = np.sum(group_counts)
|
||||
percentage = (total_count / len(pixels)) * 100
|
||||
|
||||
color_groups.append({
|
||||
'color': base_color,
|
||||
'count': total_count,
|
||||
'percentage': percentage,
|
||||
'num_variants': len(group_colors)
|
||||
})
|
||||
|
||||
print(f" Group {iteration}: RGB{tuple(base_color)} -> {len(group_colors)} variants, {percentage:.1f}%")
|
||||
|
||||
print(f" Created {len(color_groups)} color groups")
|
||||
|
||||
# Filter by minimum percentage
|
||||
color_groups = [g for g in color_groups if g['percentage'] >= min_percentage]
|
||||
|
||||
print(f" Main colors (>= {min_percentage}%): {len(color_groups)}")
|
||||
|
||||
# Verify percentages
|
||||
total_percentage = sum(g['percentage'] for g in color_groups)
|
||||
print(f" Total percentage: {total_percentage:.1f}%")
|
||||
|
||||
# Display results
|
||||
print(f"\n{'='*60}")
|
||||
print(f"MAIN COLORS:")
|
||||
print(f"{'='*60}")
|
||||
for i, group in enumerate(color_groups, 1):
|
||||
r, g, b = group['color']
|
||||
print(f"{i}. RGB({r:3d}, {g:3d}, {b:3d}) - {group['percentage']:5.1f}% ({group['count']:,} pixels, {group['num_variants']} variants)")
|
||||
|
||||
return color_groups
|
||||
|
||||
def create_color_layers(img, color_groups, color_threshold, output_folder='output'):
|
||||
"""Create one image per color group showing only that color."""
|
||||
print(f"\nCreating color layers...")
|
||||
|
||||
# Get all pixels
|
||||
pixels = np.array(img)
|
||||
h, w = pixels.shape[:2]
|
||||
original_pixels = pixels.reshape(-1, 3)
|
||||
|
||||
# Remove white background for grouping
|
||||
is_white = (original_pixels[:, 0] >= 250) & (original_pixels[:, 1] >= 250) & (original_pixels[:, 2] >= 250)
|
||||
|
||||
# Get unique colors for matching
|
||||
unique_colors, inverse = np.unique(original_pixels[~is_white], axis=0, return_inverse=True)
|
||||
|
||||
# For each color group, create a layer
|
||||
for i, group in enumerate(color_groups, 1):
|
||||
base_color = group['color']
|
||||
|
||||
# Calculate distances from base_color to all unique colors
|
||||
diff = unique_colors.astype(float) - base_color.astype(float)
|
||||
distances = np.sqrt(np.sum(diff ** 2, axis=1))
|
||||
|
||||
# Find which unique colors belong to this group
|
||||
in_group = distances <= color_threshold
|
||||
|
||||
# Create mask for pixels in this group
|
||||
pixel_mask = np.zeros(len(original_pixels), dtype=bool)
|
||||
pixel_mask[~is_white] = in_group[inverse]
|
||||
|
||||
# Create layer image (white background)
|
||||
layer = np.full((h, w, 3), 255, dtype=np.uint8)
|
||||
layer_flat = layer.reshape(-1, 3)
|
||||
|
||||
# Set pixels for this color group
|
||||
layer_flat[pixel_mask] = original_pixels[pixel_mask]
|
||||
|
||||
# Save layer
|
||||
r, g, b = base_color
|
||||
filename = f'layer_{i}_rgb{r}_{g}_{b}.png'
|
||||
filepath = os.path.join(output_folder, filename)
|
||||
Image.fromarray(layer).save(filepath)
|
||||
|
||||
pixel_count = np.sum(pixel_mask)
|
||||
print(f" Layer {i}: {filename} ({pixel_count:,} pixels)")
|
||||
|
||||
def save_results(color_groups, output_folder='output'):
|
||||
"""Save color palette to file."""
|
||||
output_path = os.path.join(output_folder, 'main_colors.txt')
|
||||
|
||||
with open(output_path, 'w') as f:
|
||||
f.write("MAIN COLORS (by frequency)\n")
|
||||
f.write("="*60 + "\n")
|
||||
f.write("Note: White background ignored\n")
|
||||
f.write(" Similar colors grouped together\n\n")
|
||||
|
||||
for i, group in enumerate(color_groups, 1):
|
||||
r, g, b = group['color']
|
||||
f.write(f"{i}. RGB({r}, {g}, {b})\n")
|
||||
f.write(f" {group['percentage']:.2f}% ({group['count']:,} pixels)\n")
|
||||
f.write(f" {group['num_variants']} color variants\n")
|
||||
f.write(f" Hex: #{r:02X}{g:02X}{b:02X}\n\n")
|
||||
|
||||
print(f"\nResults saved to: {output_path}")
|
||||
|
||||
def main(file_path, color_threshold=30, min_percentage=0.5, dpi=300, output_folder='output'):
|
||||
"""Main function."""
|
||||
print("="*60)
|
||||
print("COLOR EXTRACTOR - Find Main Colors")
|
||||
print("="*60)
|
||||
|
||||
# Clear output
|
||||
clear_output_folder(output_folder)
|
||||
|
||||
# Load image
|
||||
print(f"\nInput: {file_path}")
|
||||
img = load_image(file_path, dpi)
|
||||
|
||||
# Save original
|
||||
original_path = os.path.join(output_folder, 'original.png')
|
||||
img.save(original_path)
|
||||
|
||||
# Find main colors
|
||||
color_groups = find_main_colors(img, color_threshold, min_percentage)
|
||||
|
||||
if len(color_groups) == 0:
|
||||
print("\nNo main colors found.")
|
||||
return
|
||||
|
||||
# Create color layers
|
||||
create_color_layers(img, color_groups, color_threshold, output_folder)
|
||||
|
||||
# Save results
|
||||
save_results(color_groups, output_folder)
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"✓ COMPLETE - Found {len(color_groups)} main colors")
|
||||
print(f" Created {len(color_groups)} color layer images")
|
||||
print(f"{'='*60}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Input file (PDF or image)
|
||||
file_path = "input.pdf" # or "input.png", "input.jpg", etc.
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
print(f"Error: '{file_path}' not found!")
|
||||
print("Usage: Place your file as 'input.pdf' or 'input.png'")
|
||||
else:
|
||||
# Parameters:
|
||||
# color_threshold: Distance between colors to group them (0-441)
|
||||
# Distance = sqrt((R1-R2)² + (G1-G2)² + (B1-B2)²)
|
||||
# Examples:
|
||||
# RGB(0,0,0) to RGB(0,0,1) = distance of 1
|
||||
# RGB(0,0,0) to RGB(10,10,10) = distance of ~17
|
||||
# RGB(0,0,0) to RGB(30,30,30) = distance of ~52
|
||||
# Recommended values:
|
||||
# 10-20: Very strict - only very similar colors grouped
|
||||
# 30-50: Good for most diagrams (RECOMMENDED)
|
||||
# 60-100: Loose - more aggressive grouping
|
||||
#
|
||||
# min_percentage: Minimum % to be a "main" color
|
||||
# 0.5: Include colors that are at least 0.5% of image
|
||||
# 1.0: Only colors that are at least 1% of image
|
||||
# 0.1: Include even small but significant colors
|
||||
#
|
||||
# dpi: Resolution for PDF conversion (300 recommended)
|
||||
|
||||
main(
|
||||
file_path=file_path,
|
||||
color_threshold=120, # Group similar colors
|
||||
min_percentage=3, # Min 0.5% to be considered "main"
|
||||
dpi=300, # PDF resolution
|
||||
output_folder='output'
|
||||
)
|
||||
114
label/cores/file/QUICKSTART.md
Normal file
114
label/cores/file/QUICKSTART.md
Normal file
@@ -0,0 +1,114 @@
|
||||
# Quick Start Guide
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Basic Usage
|
||||
|
||||
### Extract layers from PDF
|
||||
```bash
|
||||
python layer_extractor.py diagram.pdf
|
||||
```
|
||||
|
||||
That's it! Layers will be saved to `output/` directory.
|
||||
|
||||
## Common Adjustments
|
||||
|
||||
### Problem: Colors bleeding between layers
|
||||
```bash
|
||||
# INCREASE tolerance (counter-intuitive but correct!)
|
||||
# Antialiasing creates intermediate colors that need higher tolerance
|
||||
python layer_extractor.py diagram.pdf -t 45
|
||||
```
|
||||
|
||||
### Problem: Layers mixing too much
|
||||
```bash
|
||||
# Decrease tolerance (stricter color matching)
|
||||
python layer_extractor.py diagram.pdf -t 20
|
||||
```
|
||||
|
||||
### Problem: Missing fine details
|
||||
```bash
|
||||
# Increase tolerance + higher DPI
|
||||
python layer_extractor.py diagram.pdf -t 40 --dpi 600
|
||||
```
|
||||
|
||||
### Problem: Too many layers detected
|
||||
```bash
|
||||
# Increase minimum pixel threshold
|
||||
python layer_extractor.py diagram.pdf -m 500
|
||||
```
|
||||
|
||||
### Problem: Need exact number of layers
|
||||
```bash
|
||||
# Specify layer count (extracts top N by frequency)
|
||||
python layer_extractor.py diagram.pdf -n 4
|
||||
```
|
||||
|
||||
### Problem: Low quality output
|
||||
```bash
|
||||
# Render at higher DPI
|
||||
python layer_extractor.py diagram.pdf --dpi 600
|
||||
```
|
||||
|
||||
## Output Files
|
||||
|
||||
Files are saved as:
|
||||
```
|
||||
output/diagram_layer1_220_050_050.png (Red layer)
|
||||
output/diagram_layer2_050_100_220.png (Blue layer)
|
||||
output/diagram_layer3_050_180_050.png (Green layer)
|
||||
```
|
||||
|
||||
The numbers in filename are RGB values of the layer color.
|
||||
|
||||
## Typical Workflows
|
||||
|
||||
### Standard diagram (moderate antialiasing)
|
||||
```bash
|
||||
python layer_extractor.py diagram.pdf
|
||||
# Use defaults - works for most cases
|
||||
```
|
||||
|
||||
### High-detail mechanical drawing
|
||||
```bash
|
||||
python layer_extractor.py drawing.pdf --dpi 600 -t 25
|
||||
# Higher resolution, tighter tolerance
|
||||
```
|
||||
|
||||
### Scanned/compressed diagram
|
||||
```bash
|
||||
python layer_extractor.py scanned.pdf -t 45
|
||||
# More lenient to handle artifacts
|
||||
```
|
||||
|
||||
### Known layer count
|
||||
```bash
|
||||
python layer_extractor.py diagram.pdf -n 3
|
||||
# Faster if you know there are 3 layers
|
||||
```
|
||||
|
||||
## Parameters Quick Reference
|
||||
|
||||
- `--dpi` - Resolution (default 300)
|
||||
- 150 = draft
|
||||
- 300 = standard
|
||||
- 600 = high quality
|
||||
|
||||
- `-t` - Tolerance (default 30, scale 0-100)
|
||||
- 15-20 = strict
|
||||
- 30 = balanced (RECOMMENDED)
|
||||
- 45-60 = lenient (for antialiasing)
|
||||
|
||||
- `-n` - Number of layers (default auto-detect)
|
||||
|
||||
- `-o` - Output directory (default "output")
|
||||
|
||||
## Getting Help
|
||||
|
||||
```bash
|
||||
python layer_extractor.py --help
|
||||
```
|
||||
181
label/cores/file/README.md
Normal file
181
label/cores/file/README.md
Normal file
@@ -0,0 +1,181 @@
|
||||
# PDF Layer Extractor for Industrial Diagrams
|
||||
|
||||
Extract colored layers from PDF industrial diagrams with white backgrounds. Automatically handles variable layer counts and antialiasing around text.
|
||||
|
||||
## Features
|
||||
|
||||
- **PDF Support**: Direct PDF processing at configurable DPI
|
||||
- **Automatic Layer Detection**: K-means clustering identifies distinct colored layers
|
||||
- **Handles Antialiasing**: Tolerates color mixing around text and fine details
|
||||
- **Variable Layer Counts**: Auto-detects all colored layers
|
||||
- **Strict White Filtering**: Pure white (255,255,255) treated as background only
|
||||
- **High Quality Output**: Each layer saved as transparent PNG
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Basic usage
|
||||
python layer_extractor.py diagram.pdf
|
||||
|
||||
# Higher resolution
|
||||
python layer_extractor.py diagram.pdf --dpi 600
|
||||
|
||||
# Extract to specific folder
|
||||
python layer_extractor.py diagram.pdf -o my_layers/
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Command
|
||||
|
||||
```bash
|
||||
python layer_extractor.py diagram.pdf
|
||||
```
|
||||
|
||||
Output: `output/diagram_layer1_255_000_000.png`, `output/diagram_layer2_000_000_255.png`, etc.
|
||||
|
||||
### Common Options
|
||||
|
||||
```bash
|
||||
# High resolution rendering (better for detailed diagrams)
|
||||
python layer_extractor.py diagram.pdf --dpi 600
|
||||
|
||||
# Adjust color tolerance (for antialiasing issues)
|
||||
python layer_extractor.py diagram.pdf -t 40
|
||||
|
||||
# Extract only top 3 layers
|
||||
python layer_extractor.py diagram.pdf -n 3
|
||||
|
||||
# Custom output directory
|
||||
python layer_extractor.py diagram.pdf -o layers/
|
||||
```
|
||||
|
||||
## Parameters
|
||||
|
||||
- `--dpi` (default: 300) - PDF rendering resolution
|
||||
- 300: Standard quality, faster
|
||||
- 600: High quality, larger files
|
||||
- 150: Draft quality, quick preview
|
||||
|
||||
- `-t, --tolerance` (default: 30) - Color matching tolerance (0-100 scale)
|
||||
- **10-15**: Very strict, only nearly identical colors
|
||||
- **20-25**: Strict, minimal antialiasing
|
||||
- **30**: Default, handles moderate antialiasing (RECOMMENDED)
|
||||
- **40-50**: Lenient, good for heavy antialiasing around text
|
||||
- **60+**: Very lenient, may blur layer boundaries
|
||||
|
||||
- `-n, --n-layers` - Extract specific number of layers (default: auto-detect)
|
||||
|
||||
- `-m, --min-pixels` (default: 100) - Minimum pixels to consider a valid layer
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **PDF Rendering**: Converts PDF to high-resolution image at specified DPI
|
||||
2. **Color Analysis**: Uses K-means clustering on pixel colors
|
||||
3. **White Filtering**: Removes pure white background (RGB ≥ 250,250,250)
|
||||
4. **Layer Extraction**: For each color, creates a mask of similar pixels
|
||||
5. **Alpha Blending**: Handles antialiasing with gradient transparency
|
||||
6. **Output**: Saves each layer as transparent PNG
|
||||
|
||||
## Output Format
|
||||
|
||||
Files are named: `{pdf_name}_layer{N}_{R}_{G}_{B}.png`
|
||||
|
||||
Example:
|
||||
```
|
||||
output/
|
||||
├── piping_diagram_layer1_220_050_050.png (Red layer)
|
||||
├── piping_diagram_layer2_050_100_220.png (Blue layer)
|
||||
└── piping_diagram_layer3_050_180_050.png (Green layer)
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Colors bleeding between layers (antialiasing issue)
|
||||
|
||||
**Problem**: Gray pixels from antialiasing appearing in wrong layer, especially around black text on gray layers
|
||||
|
||||
**Explanation**: When black text (0,0,0) sits on a gray layer (150,150,150), antialiasing creates intermediate grays (75,75,75, 100,100,100, etc.) that are far from both black and gray in color space.
|
||||
|
||||
**Solution**: Increase tolerance to capture these intermediate colors
|
||||
```bash
|
||||
# For moderate antialiasing (default, usually works)
|
||||
python layer_extractor.py diagram.pdf -t 30
|
||||
|
||||
# For heavy antialiasing (small text, compressed PDFs)
|
||||
python layer_extractor.py diagram.pdf -t 45
|
||||
|
||||
# For extreme cases (very compressed or low quality)
|
||||
python layer_extractor.py diagram.pdf -t 60
|
||||
```
|
||||
|
||||
### Missing fine details
|
||||
|
||||
**Problem**: Thin lines or small text not captured
|
||||
|
||||
**Solution**: Increase tolerance or DPI
|
||||
```bash
|
||||
python layer_extractor.py diagram.pdf -t 40 --dpi 600
|
||||
```
|
||||
|
||||
### Too many layers detected
|
||||
|
||||
**Problem**: Small color artifacts creating extra layers
|
||||
|
||||
**Solution**: Increase minimum pixel threshold
|
||||
```bash
|
||||
python layer_extractor.py diagram.pdf -m 500
|
||||
```
|
||||
|
||||
### Blurry output
|
||||
|
||||
**Problem**: Output quality not good enough
|
||||
|
||||
**Solution**: Increase DPI
|
||||
```bash
|
||||
python layer_extractor.py diagram.pdf --dpi 600
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
### Standard industrial diagram
|
||||
```bash
|
||||
python layer_extractor.py electrical_schematic.pdf
|
||||
```
|
||||
|
||||
### High-detail mechanical drawing
|
||||
```bash
|
||||
python layer_extractor.py mechanical_drawing.pdf --dpi 600 -t 25
|
||||
```
|
||||
|
||||
### Diagram with known 4 layers
|
||||
```bash
|
||||
python layer_extractor.py hvac_diagram.pdf -n 4
|
||||
```
|
||||
|
||||
### Compressed/low-quality PDF
|
||||
```bash
|
||||
python layer_extractor.py scanned_diagram.pdf -t 50 --dpi 300
|
||||
```
|
||||
|
||||
## Tips
|
||||
|
||||
1. **Start with defaults** - They work for most diagrams
|
||||
2. **Check first** - Run once and review output before batch processing
|
||||
3. **DPI vs File Size** - Higher DPI = better quality but larger files
|
||||
4. **Tolerance tuning** - Adjust by ±5-10 at a time
|
||||
5. **Layer count** - Use `-n` if you know exact number for faster processing
|
||||
|
||||
## Requirements
|
||||
|
||||
- Python 3.7+
|
||||
- PyMuPDF (PDF rendering)
|
||||
- Pillow (image processing)
|
||||
- NumPy (array operations)
|
||||
- scikit-learn (color clustering)
|
||||
120
label/cores/file/SUMMARY.md
Normal file
120
label/cores/file/SUMMARY.md
Normal file
@@ -0,0 +1,120 @@
|
||||
# PDF Layer Extractor - Summary
|
||||
|
||||
## What It Does
|
||||
|
||||
Extracts colored layers from PDF industrial diagrams into separate transparent PNG files.
|
||||
|
||||
✓ Single PDF file processing
|
||||
✓ White background filtered (pure white only)
|
||||
✓ Variable number of layers (auto-detected)
|
||||
✓ Handles antialiasing around text
|
||||
✓ High-quality output at configurable DPI
|
||||
|
||||
## Quick Start
|
||||
|
||||
1. Install dependencies:
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
2. Run on your PDF:
|
||||
```bash
|
||||
python layer_extractor.py your_diagram.pdf
|
||||
```
|
||||
|
||||
3. Find layers in `output/` folder
|
||||
|
||||
## Key Features
|
||||
|
||||
### Automatic Color Detection
|
||||
Uses K-means clustering to identify distinct colored layers. White (RGB ≥ 250) is treated as background only.
|
||||
|
||||
### Antialiasing Handling
|
||||
The tolerance parameter (default 30) handles color mixing:
|
||||
- Text antialiasing creates gray pixels around black text
|
||||
- Tolerance value captures these gradual color transitions
|
||||
- Each pixel gets alpha based on distance from target color
|
||||
|
||||
### Output Format
|
||||
Files named: `diagram_layerN_RRR_GGG_BBB.png`
|
||||
- Transparent PNG with only that color layer
|
||||
- RGB values in filename for reference
|
||||
|
||||
## Common Usage
|
||||
|
||||
```bash
|
||||
# Default (works for most diagrams)
|
||||
python layer_extractor.py diagram.pdf
|
||||
|
||||
# High quality
|
||||
python layer_extractor.py diagram.pdf --dpi 600
|
||||
|
||||
# Strict color separation (less antialiasing bleed)
|
||||
python layer_extractor.py diagram.pdf -t 20
|
||||
|
||||
# Lenient (more antialiasing tolerance)
|
||||
python layer_extractor.py diagram.pdf -t 40
|
||||
|
||||
# Extract top 3 layers only
|
||||
python layer_extractor.py diagram.pdf -n 3
|
||||
|
||||
# Custom output folder
|
||||
python layer_extractor.py diagram.pdf -o my_layers/
|
||||
```
|
||||
|
||||
## Parameters
|
||||
|
||||
| Parameter | Default | Description |
|
||||
|-----------|---------|-------------|
|
||||
| `--dpi` | 300 | PDF rendering resolution (150/300/600) |
|
||||
| `-t, --tolerance` | 30 | Color matching tolerance (15-50 typical) |
|
||||
| `-n, --n-layers` | auto | Number of layers to extract |
|
||||
| `-m, --min-pixels` | 100 | Minimum pixels for valid layer |
|
||||
| `-o, --output` | output | Output directory |
|
||||
|
||||
## Tolerance Guide
|
||||
|
||||
The tolerance parameter is key to handling antialiasing:
|
||||
|
||||
- **15-20**: Very strict, clean diagrams with no antialiasing
|
||||
- **30** (default): Balanced, handles moderate antialiasing
|
||||
- **40-50**: Lenient, for heavy antialiasing or compression artifacts
|
||||
|
||||
### Example: Gray Layer with Black Text
|
||||
|
||||
When you have a light gray layer with black text:
|
||||
- Black text creates gray antialiasing pixels
|
||||
- These gray pixels are close to the gray layer color
|
||||
- Higher tolerance includes them in the gray layer
|
||||
- Lower tolerance might miss them
|
||||
|
||||
Start with default (30) and adjust ±10 based on results.
|
||||
|
||||
## Files Included
|
||||
|
||||
1. **layer_extractor.py** - Main script
|
||||
2. **requirements.txt** - Dependencies (PyMuPDF, Pillow, numpy, scikit-learn)
|
||||
3. **README.md** - Full documentation
|
||||
4. **QUICKSTART.md** - Quick reference guide
|
||||
|
||||
## Technical Notes
|
||||
|
||||
- Uses PyMuPDF to render PDF at specified DPI
|
||||
- K-means clustering identifies dominant colors
|
||||
- Euclidean distance in RGB space for color matching
|
||||
- Alpha channel gradient for smooth edges
|
||||
- White detection: all RGB values ≥ 250
|
||||
|
||||
## Example Output
|
||||
|
||||
Input: `piping_diagram.pdf`
|
||||
Output:
|
||||
```
|
||||
output/
|
||||
├── piping_diagram_layer1_220_050_050.png (red piping)
|
||||
├── piping_diagram_layer2_050_100_220.png (blue electrical)
|
||||
├── piping_diagram_layer3_150_150_150.png (gray annotations)
|
||||
└── piping_diagram_layer4_050_180_050.png (green mechanical)
|
||||
```
|
||||
|
||||
Each PNG has transparent background with only that color layer visible.
|
||||
52
label/cores/file/UPDATE_NOTES.md
Normal file
52
label/cores/file/UPDATE_NOTES.md
Normal file
@@ -0,0 +1,52 @@
|
||||
# Tolerance Parameter Fix - Update Notes
|
||||
|
||||
## What Was Wrong
|
||||
|
||||
The original tolerance parameter used raw Euclidean distance in RGB space (0-255 scale), which was unintuitive:
|
||||
- Max possible distance in RGB = sqrt(3 × 255²) ≈ 441
|
||||
- A tolerance of "30" was actually very strict (only ~7% of max distance)
|
||||
- For antialiasing around text, you needed values like 150+ which wasn't obvious
|
||||
|
||||
## What's Fixed
|
||||
|
||||
**New Scale: 0-100** (percentage-based)
|
||||
- 0 = exact color match only
|
||||
- 30 = 30% of maximum color distance (default, RECOMMENDED)
|
||||
- 100 = maximum tolerance
|
||||
|
||||
**Why This Matters for Antialiasing:**
|
||||
|
||||
Example: Gray layer (150,150,150) with black text (0,0,0)
|
||||
- Antialiasing creates intermediate colors: (75,75,75), (100,100,100), (125,125,125)
|
||||
- Distance from gray (150,150,150) to (75,75,75) = sqrt(3 × 75²) ≈ 130
|
||||
- Old scale: You'd need tolerance ~130 (not intuitive)
|
||||
- New scale: tolerance 30-45 captures these (makes sense!)
|
||||
|
||||
## Updated Recommendations
|
||||
|
||||
```bash
|
||||
# Default - good for most diagrams
|
||||
python layer_extractor.py diagram.pdf -t 30
|
||||
|
||||
# Heavy antialiasing (small text, complex diagrams)
|
||||
python layer_extractor.py diagram.pdf -t 45
|
||||
|
||||
# Extreme antialiasing (compressed PDFs, low quality)
|
||||
python layer_extractor.py diagram.pdf -t 60
|
||||
|
||||
# Very strict (clean diagrams, no antialiasing)
|
||||
python layer_extractor.py diagram.pdf -t 15
|
||||
```
|
||||
|
||||
## Key Point
|
||||
|
||||
**If you see missing pixels around text or edges → INCREASE tolerance (not decrease!)**
|
||||
|
||||
The antialiased pixels are "far" from the target color in RGB space, so they need higher tolerance to be captured.
|
||||
|
||||
## Test Your Diagram
|
||||
|
||||
Start with default (30), then:
|
||||
- Missing pixels/gaps around text? → Try 45
|
||||
- Still missing details? → Try 60
|
||||
- Layers bleeding together? → Try 20
|
||||
418
label/cores/file/layer_extractor.py
Normal file
418
label/cores/file/layer_extractor.py
Normal file
@@ -0,0 +1,418 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
PDF Industrial Diagram Layer Extractor
|
||||
Extracts colored layers from PDF diagrams with white backgrounds.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from sklearn.cluster import KMeans
|
||||
from collections import Counter
|
||||
import argparse
|
||||
|
||||
try:
|
||||
import fitz # PyMuPDF
|
||||
except ImportError:
|
||||
print("Error: PyMuPDF not installed. Install with: pip install PyMuPDF")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def pdf_to_image(pdf_path, dpi=300):
|
||||
"""
|
||||
Convert PDF to PIL Image.
|
||||
|
||||
Args:
|
||||
pdf_path: Path to PDF file
|
||||
dpi: Resolution for rendering (default: 300)
|
||||
|
||||
Returns:
|
||||
PIL Image object
|
||||
"""
|
||||
print(f"Loading PDF: {pdf_path}")
|
||||
doc = fitz.open(pdf_path)
|
||||
|
||||
if len(doc) > 1:
|
||||
print(f" PDF has {len(doc)} pages, processing first page only")
|
||||
|
||||
# Get first page
|
||||
page = doc[0]
|
||||
|
||||
# Render page to image
|
||||
mat = fitz.Matrix(dpi/72, dpi/72) # Scale factor for DPI
|
||||
pix = page.get_pixmap(matrix=mat, alpha=False)
|
||||
|
||||
# Convert to PIL Image
|
||||
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
||||
|
||||
print(f" Rendered at {img.size[0]}x{img.size[1]} pixels ({dpi} DPI)")
|
||||
doc.close()
|
||||
|
||||
return img
|
||||
|
||||
|
||||
def get_dominant_colors(img, n_colors=15, sample_fraction=0.2):
|
||||
"""
|
||||
Identify dominant colors using KMeans clustering.
|
||||
|
||||
Args:
|
||||
img: PIL Image object
|
||||
n_colors: Maximum number of colors to detect
|
||||
sample_fraction: Fraction of pixels to sample
|
||||
|
||||
Returns:
|
||||
List of (color, pixel_count) tuples sorted by frequency
|
||||
"""
|
||||
print("Analyzing colors...")
|
||||
img_array = np.array(img)
|
||||
pixels = img_array.reshape(-1, 3)
|
||||
|
||||
# Sample pixels for speed
|
||||
if sample_fraction < 1.0:
|
||||
n_samples = int(len(pixels) * sample_fraction)
|
||||
indices = np.random.choice(len(pixels), n_samples, replace=False)
|
||||
sampled_pixels = pixels[indices]
|
||||
else:
|
||||
sampled_pixels = pixels
|
||||
|
||||
# KMeans clustering
|
||||
n_clusters = min(n_colors, len(np.unique(sampled_pixels, axis=0)))
|
||||
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
|
||||
kmeans.fit(sampled_pixels)
|
||||
|
||||
# Get colors and frequencies
|
||||
colors = kmeans.cluster_centers_.astype(int)
|
||||
labels = kmeans.predict(pixels)
|
||||
counts = Counter(labels)
|
||||
|
||||
# Sort by frequency
|
||||
color_counts = [(tuple(colors[i]), counts[i]) for i in range(len(colors))]
|
||||
color_counts.sort(key=lambda x: x[1], reverse=True)
|
||||
|
||||
return color_counts
|
||||
|
||||
|
||||
def is_white(color, threshold=250):
|
||||
"""Check if color is white (strict check for pure white background)."""
|
||||
return all(c >= threshold for c in color)
|
||||
|
||||
|
||||
def color_distance(color1, color2):
|
||||
"""Calculate Euclidean distance between two RGB colors."""
|
||||
return np.sqrt(sum((a - b) ** 2 for a, b in zip(color1, color2)))
|
||||
|
||||
|
||||
def merge_similar_colors(color_counts, similarity_threshold=40):
|
||||
"""
|
||||
Merge similar colors into groups to reduce layer fragmentation.
|
||||
|
||||
Args:
|
||||
color_counts: List of (color, pixel_count) tuples
|
||||
similarity_threshold: Maximum color distance to merge (0-441, default: 40)
|
||||
|
||||
Returns:
|
||||
List of merged (color, total_pixel_count) tuples
|
||||
"""
|
||||
if not color_counts:
|
||||
return []
|
||||
|
||||
merged = []
|
||||
used = set()
|
||||
|
||||
for i, (color1, count1) in enumerate(color_counts):
|
||||
if i in used:
|
||||
continue
|
||||
|
||||
# Start a new group with this color
|
||||
group_colors = [color1]
|
||||
group_count = count1
|
||||
used.add(i)
|
||||
|
||||
# Find similar colors to merge
|
||||
for j, (color2, count2) in enumerate(color_counts):
|
||||
if j <= i or j in used:
|
||||
continue
|
||||
|
||||
if color_distance(color1, color2) <= similarity_threshold:
|
||||
group_colors.append(color2)
|
||||
group_count += count2
|
||||
used.add(j)
|
||||
|
||||
# Use the average color of the group
|
||||
avg_color = tuple(int(np.mean([c[i] for c in group_colors])) for i in range(3))
|
||||
merged.append((avg_color, group_count))
|
||||
|
||||
# Sort by pixel count (most predominant first)
|
||||
merged.sort(key=lambda x: x[1], reverse=True)
|
||||
return merged
|
||||
|
||||
|
||||
def get_layer_region(img, target_color, tolerance=30):
|
||||
"""
|
||||
Get the bounding box region of a layer.
|
||||
|
||||
Args:
|
||||
img: PIL Image object
|
||||
target_color: RGB tuple of target color
|
||||
tolerance: Color matching tolerance
|
||||
|
||||
Returns:
|
||||
Tuple of (min_x, min_y, max_x, max_y, pixel_count) or None if no pixels found
|
||||
"""
|
||||
img_array = np.array(img).astype(np.float32)
|
||||
|
||||
# Calculate color distance for all pixels
|
||||
target = np.array(target_color, dtype=np.float32)
|
||||
distances = np.sqrt(np.sum((img_array - target) ** 2, axis=2))
|
||||
|
||||
# Scale tolerance
|
||||
max_distance = np.sqrt(3 * 255 ** 2)
|
||||
actual_tolerance = (tolerance / 100.0) * max_distance
|
||||
|
||||
# Find matching pixels
|
||||
mask = distances <= actual_tolerance
|
||||
|
||||
if not mask.any():
|
||||
return None
|
||||
|
||||
# Get coordinates of matching pixels
|
||||
y_coords, x_coords = np.where(mask)
|
||||
|
||||
if len(y_coords) == 0:
|
||||
return None
|
||||
|
||||
# Calculate bounding box
|
||||
min_x, max_x = x_coords.min(), x_coords.max()
|
||||
min_y, max_y = y_coords.min(), y_coords.max()
|
||||
pixel_count = mask.sum()
|
||||
|
||||
return (min_x, min_y, max_x, max_y, pixel_count)
|
||||
|
||||
|
||||
def extract_layer(img, target_color, tolerance=30, min_alpha=128):
|
||||
"""
|
||||
Extract a single colored layer.
|
||||
|
||||
Args:
|
||||
img: PIL Image object
|
||||
target_color: RGB tuple of target color
|
||||
tolerance: Per-channel color distance (0-100, where 100 = max tolerance)
|
||||
min_alpha: Minimum alpha value to keep (0-255, higher = less ghost pixels)
|
||||
|
||||
Returns:
|
||||
PIL Image with transparent background
|
||||
"""
|
||||
img_array = np.array(img).astype(np.float32)
|
||||
h, w, _ = img_array.shape
|
||||
|
||||
# Create output with alpha channel
|
||||
output = np.zeros((h, w, 4), dtype=np.uint8)
|
||||
|
||||
# Calculate color distance for all pixels
|
||||
target = np.array(target_color, dtype=np.float32)
|
||||
distances = np.sqrt(np.sum((img_array - target) ** 2, axis=2))
|
||||
|
||||
# Scale tolerance: max Euclidean distance in RGB is sqrt(3*255^2) ≈ 441
|
||||
# Map tolerance (0-100) to actual distance (0-441)
|
||||
# tolerance=30 maps to ~132 distance (good for moderate antialiasing)
|
||||
# tolerance=50 maps to ~220 distance (good for heavy antialiasing)
|
||||
max_distance = np.sqrt(3 * 255 ** 2) # ≈ 441.67
|
||||
actual_tolerance = (tolerance / 100.0) * max_distance
|
||||
|
||||
# Mask pixels within tolerance
|
||||
mask = distances <= actual_tolerance
|
||||
|
||||
# Calculate alpha with gradient based on distance
|
||||
alpha = np.clip(255 * (1 - distances / actual_tolerance), 0, 255).astype(np.uint8)
|
||||
|
||||
# Filter out ghost pixels: only keep pixels with alpha >= min_alpha
|
||||
strong_mask = mask & (alpha >= min_alpha)
|
||||
|
||||
# Copy matching pixels
|
||||
output[strong_mask, :3] = img_array[strong_mask].astype(np.uint8)
|
||||
output[strong_mask, 3] = alpha[strong_mask]
|
||||
|
||||
return Image.fromarray(output, 'RGBA')
|
||||
|
||||
|
||||
def process_pdf(pdf_path, output_dir='output', dpi=300, tolerance=30,
|
||||
min_pixels=100, n_layers=None, merge_threshold=40,
|
||||
show_regions=True, min_alpha=128):
|
||||
"""
|
||||
Process a PDF diagram and extract layers.
|
||||
|
||||
Args:
|
||||
pdf_path: Path to PDF file
|
||||
output_dir: Output directory for layers
|
||||
dpi: PDF rendering resolution
|
||||
tolerance: Color matching tolerance
|
||||
min_pixels: Minimum pixels for valid layer
|
||||
n_layers: Number of layers to extract (None = auto)
|
||||
merge_threshold: Color distance for merging similar layers (0-441)
|
||||
show_regions: Display bounding box regions for each layer
|
||||
min_alpha: Minimum alpha value to keep (0-255, removes ghost pixels)
|
||||
"""
|
||||
# Convert PDF to image
|
||||
img = pdf_to_image(pdf_path, dpi)
|
||||
total_pixels = img.size[0] * img.size[1]
|
||||
|
||||
# Detect colors
|
||||
color_counts = get_dominant_colors(img, n_colors=20)
|
||||
|
||||
# Filter out white background
|
||||
layer_colors = []
|
||||
print("\nDetected colors (before merging):")
|
||||
for color, count in color_counts:
|
||||
if is_white(color):
|
||||
print(f" RGB{color}: {count:,} pixels - WHITE BACKGROUND (skipped)")
|
||||
elif count >= min_pixels:
|
||||
percentage = (count / total_pixels) * 100
|
||||
layer_colors.append((color, count))
|
||||
print(f" RGB{color}: {count:,} pixels ({percentage:.1f}%)")
|
||||
|
||||
if not layer_colors:
|
||||
print("No colored layers found!")
|
||||
return
|
||||
|
||||
# Merge similar colors to reduce fragmentation
|
||||
print(f"\nMerging similar colors (threshold: {merge_threshold})...")
|
||||
merged_layers = merge_similar_colors(layer_colors, merge_threshold)
|
||||
|
||||
print(f"\nMerged layers (predominant first):")
|
||||
for color, count in merged_layers:
|
||||
percentage = (count / total_pixels) * 100
|
||||
print(f" RGB{color}: {count:,} pixels ({percentage:.1f}%)")
|
||||
|
||||
# Limit layers if specified
|
||||
if n_layers:
|
||||
merged_layers = merged_layers[:n_layers]
|
||||
print(f"\nKeeping top {n_layers} layers")
|
||||
|
||||
print(f"\n{len(merged_layers)} layers to extract")
|
||||
|
||||
# Create output directory
|
||||
base_name = os.path.splitext(os.path.basename(pdf_path))[0]
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# Analyze regions and extract layers
|
||||
if show_regions:
|
||||
print("\nAnalyzing layer regions...")
|
||||
|
||||
print("\nExtracting layers...")
|
||||
for idx, (color, count) in enumerate(merged_layers, 1):
|
||||
percentage = (count / total_pixels) * 100
|
||||
|
||||
if show_regions:
|
||||
# Get region information
|
||||
region = get_layer_region(img, color, tolerance)
|
||||
if region:
|
||||
min_x, min_y, max_x, max_y, pixel_count = region
|
||||
width = max_x - min_x + 1
|
||||
height = max_y - min_y + 1
|
||||
print(f" [{idx}/{len(merged_layers)}] RGB{color} - {count:,} px ({percentage:.1f}%)")
|
||||
print(f" Region: ({min_x},{min_y}) to ({max_x},{max_y}) - {width}x{height} px")
|
||||
else:
|
||||
print(f" [{idx}/{len(merged_layers)}] RGB{color} - {count:,} px ({percentage:.1f}%)")
|
||||
else:
|
||||
print(f" [{idx}/{len(merged_layers)}] RGB{color}...", end=' ')
|
||||
|
||||
# Extract layer
|
||||
layer_img = extract_layer(img, color, tolerance, min_alpha)
|
||||
|
||||
# Save with descriptive filename
|
||||
color_name = f"{color[0]:03d}_{color[1]:03d}_{color[2]:03d}"
|
||||
output_path = os.path.join(output_dir, f"{base_name}_layer{idx}_{color_name}.png")
|
||||
layer_img.save(output_path)
|
||||
|
||||
if not show_regions:
|
||||
print(f"✓ Saved")
|
||||
else:
|
||||
print(f" Saved: {output_path}")
|
||||
|
||||
print(f"\n✓ Complete! {len(merged_layers)} layers saved to: {output_dir}/")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Extract colored layers from PDF industrial diagrams',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Basic usage
|
||||
python layer_extractor.py diagram.pdf
|
||||
|
||||
# Custom output directory and DPI
|
||||
python layer_extractor.py diagram.pdf -o layers/ --dpi 600
|
||||
|
||||
# Adjust color tolerance
|
||||
python layer_extractor.py diagram.pdf -t 40
|
||||
|
||||
# Extract specific number of layers
|
||||
python layer_extractor.py diagram.pdf -n 5
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('pdf', help='Input PDF file')
|
||||
parser.add_argument('-o', '--output', default='output',
|
||||
help='Output directory (default: output)')
|
||||
parser.add_argument('--dpi', type=int, default=300,
|
||||
help='PDF rendering DPI (default: 300, higher = better quality)')
|
||||
parser.add_argument('-t', '--tolerance', type=int, default=30,
|
||||
help='Color matching tolerance 0-100 (default: 30, higher = more lenient)')
|
||||
parser.add_argument('-n', '--n-layers', type=int,
|
||||
help='Extract exactly N layers (default: auto-detect all)')
|
||||
parser.add_argument('-m', '--min-pixels', type=int, default=100,
|
||||
help='Minimum pixels for valid layer (default: 100)')
|
||||
parser.add_argument('--merge', type=int, default=40,
|
||||
help='Color merge threshold 0-441 (default: 40, higher = more aggressive merging)')
|
||||
parser.add_argument('--min-alpha', type=int, default=128,
|
||||
help='Minimum alpha value 0-255 (default: 128, higher = remove more ghost pixels)')
|
||||
parser.add_argument('--no-regions', action='store_true',
|
||||
help='Disable region analysis output')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate tolerance range
|
||||
if not 0 <= args.tolerance <= 100:
|
||||
print(f"Error: Tolerance must be between 0-100 (got {args.tolerance})")
|
||||
return 1
|
||||
|
||||
# Validate min_alpha range
|
||||
if not 0 <= args.min_alpha <= 255:
|
||||
print(f"Error: min-alpha must be between 0-255 (got {args.min_alpha})")
|
||||
return 1
|
||||
|
||||
# Validate input
|
||||
if not os.path.isfile(args.pdf):
|
||||
print(f"Error: File not found: {args.pdf}")
|
||||
return 1
|
||||
|
||||
if not args.pdf.lower().endswith('.pdf'):
|
||||
print(f"Error: Input must be a PDF file")
|
||||
return 1
|
||||
|
||||
# Process PDF
|
||||
try:
|
||||
process_pdf(
|
||||
args.pdf,
|
||||
output_dir=args.output,
|
||||
dpi=args.dpi,
|
||||
tolerance=args.tolerance,
|
||||
min_pixels=args.min_pixels,
|
||||
n_layers=args.n_layers,
|
||||
merge_threshold=args.merge,
|
||||
show_regions=not args.no_regions,
|
||||
min_alpha=args.min_alpha
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"\n✗ Error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
exit(main())
|
||||
4
label/cores/file/requirements.txt
Normal file
4
label/cores/file/requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
PyMuPDF>=1.23.0
|
||||
Pillow>=10.0.0
|
||||
numpy>=1.24.0
|
||||
scikit-learn>=1.3.0
|
||||
376
label/cores/sobel.py
Normal file
376
label/cores/sobel.py
Normal file
@@ -0,0 +1,376 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
PDF Edge Detection with Color Grouping (Preserving Edge Segregation)
|
||||
|
||||
Input: input.pdf
|
||||
Output: output_sobel/ folder
|
||||
"""
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from pdf2image import convert_from_path
|
||||
import os
|
||||
import shutil
|
||||
from collections import Counter
|
||||
|
||||
|
||||
def clear_output_directory(output_dir):
|
||||
if os.path.exists(output_dir):
|
||||
shutil.rmtree(output_dir)
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
|
||||
def enhance_pastel_colors(image_bgr):
|
||||
"""
|
||||
Increase saturation of pastel colors, keep gray closer to black.
|
||||
"""
|
||||
hsv = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2HSV).astype(np.float32)
|
||||
h, s, v = cv2.split(hsv)
|
||||
|
||||
# Identify pastel colors
|
||||
pastel_mask = (v > 150) & (s < 100) & (s > 10)
|
||||
|
||||
# Identify gray
|
||||
gray_mask = (s <= 10)
|
||||
|
||||
# Boost saturation for pastels
|
||||
s[pastel_mask] = np.clip(s[pastel_mask] * 2.5, 0, 255)
|
||||
|
||||
# Darken grays
|
||||
v[gray_mask] = np.clip(v[gray_mask] * 0.3, 0, 255)
|
||||
|
||||
# Reconstruct
|
||||
hsv_enhanced = cv2.merge([h, s, v]).astype(np.uint8)
|
||||
result = cv2.cvtColor(hsv_enhanced, cv2.COLOR_HSV2BGR)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def sobel_edge_detection(image):
|
||||
"""Apply Sobel filter to detect edges."""
|
||||
# Quantize colors
|
||||
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
|
||||
h, s, v = cv2.split(hsv)
|
||||
|
||||
h_quantized = (h // 5) * 5
|
||||
s_quantized = (s // 64) * 64
|
||||
v_quantized = (v // 64) * 64
|
||||
|
||||
hsv_quantized = cv2.merge([h_quantized, s_quantized, v_quantized])
|
||||
image_quantized = cv2.cvtColor(hsv_quantized, cv2.COLOR_HSV2BGR)
|
||||
|
||||
# Apply Sobel
|
||||
b, g, r = cv2.split(image_quantized)
|
||||
|
||||
edges_b = np.sqrt(cv2.Sobel(b, cv2.CV_64F, 1, 0, ksize=3)**2 +
|
||||
cv2.Sobel(b, cv2.CV_64F, 0, 1, ksize=3)**2)
|
||||
edges_g = np.sqrt(cv2.Sobel(g, cv2.CV_64F, 1, 0, ksize=3)**2 +
|
||||
cv2.Sobel(g, cv2.CV_64F, 0, 1, ksize=3)**2)
|
||||
edges_r = np.sqrt(cv2.Sobel(r, cv2.CV_64F, 1, 0, ksize=3)**2 +
|
||||
cv2.Sobel(r, cv2.CV_64F, 0, 1, ksize=3)**2)
|
||||
|
||||
combined = np.sqrt(edges_b**2 + edges_g**2 + edges_r**2)
|
||||
combined = combined / (combined.max() + 1e-8)
|
||||
|
||||
edge_mask = (combined > 0.10).astype(np.uint8) * 255
|
||||
kernel = np.ones((2, 2), np.uint8)
|
||||
edge_mask = cv2.morphologyEx(edge_mask, cv2.MORPH_CLOSE, kernel)
|
||||
|
||||
# Create BGRA with original colors
|
||||
result = np.zeros((image.shape[0], image.shape[1], 4), dtype=np.uint8)
|
||||
result[edge_mask > 0, :3] = image[edge_mask > 0]
|
||||
result[edge_mask > 0, 3] = 255
|
||||
|
||||
# Remove white pixels
|
||||
white_mask = np.all(result[:, :, :3] > 240, axis=2)
|
||||
result[white_mask, 3] = 0
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def analyze_edge_colors(edge_img, edge_mask):
|
||||
"""
|
||||
Analyze if an edge has multiple distinct colors.
|
||||
|
||||
Returns:
|
||||
(has_multiple_colors, num_colors, dominant_hues)
|
||||
"""
|
||||
bgr = edge_img[:, :, :3]
|
||||
pixels = bgr[edge_mask]
|
||||
|
||||
# Filter white
|
||||
non_white = pixels[~np.all(pixels > 240, axis=1)]
|
||||
|
||||
if len(non_white) < 10:
|
||||
return False, 0, []
|
||||
|
||||
# Convert to HSV
|
||||
hsv = cv2.cvtColor(non_white.reshape(-1, 1, 3), cv2.COLOR_BGR2HSV).reshape(-1, 3)
|
||||
|
||||
# Filter low saturation (gray)
|
||||
saturated_mask = hsv[:, 1] > 30
|
||||
saturated_hsv = hsv[saturated_mask]
|
||||
|
||||
if len(saturated_hsv) < 10:
|
||||
return False, 0, []
|
||||
|
||||
# Quantize hue into bins (every 10 degrees)
|
||||
hue_bins = (saturated_hsv[:, 0] // 10).astype(np.int32)
|
||||
|
||||
# Count occurrences
|
||||
unique_hues, counts = np.unique(hue_bins, return_counts=True)
|
||||
|
||||
# Filter significant hues (>5% of pixels)
|
||||
total = len(hue_bins)
|
||||
significant_mask = counts > (total * 0.05)
|
||||
significant_hues = unique_hues[significant_mask]
|
||||
|
||||
num_colors = len(significant_hues)
|
||||
|
||||
return num_colors > 1, num_colors, significant_hues.tolist()
|
||||
|
||||
|
||||
def split_edge_by_color(edges_bgra, edge_mask, labels, edge_id, num_colors):
|
||||
"""
|
||||
Split edge into multiple sub-edges based on color using K-means.
|
||||
|
||||
Returns:
|
||||
List of (sub_edge_image, cluster_id) tuples
|
||||
"""
|
||||
bgr = edges_bgra[:, :, :3]
|
||||
|
||||
# Get edge pixels
|
||||
y_coords, x_coords = np.where(edge_mask)
|
||||
edge_pixels = bgr[edge_mask]
|
||||
|
||||
# Filter white and convert to HSV
|
||||
non_white_mask = ~np.all(edge_pixels > 240, axis=1)
|
||||
valid_pixels = edge_pixels[non_white_mask]
|
||||
valid_y = y_coords[non_white_mask]
|
||||
valid_x = x_coords[non_white_mask]
|
||||
|
||||
if len(valid_pixels) < 10:
|
||||
# Return original edge
|
||||
edge_img = np.zeros_like(edges_bgra)
|
||||
edge_img[edge_mask] = edges_bgra[edge_mask]
|
||||
return [(edge_img, 0)]
|
||||
|
||||
# Convert to HSV for clustering (use only H and S)
|
||||
hsv = cv2.cvtColor(valid_pixels.reshape(-1, 1, 3), cv2.COLOR_BGR2HSV).reshape(-1, 3)
|
||||
features = hsv[:, :2].astype(np.float32) # Hue and Saturation only
|
||||
|
||||
# K-means clustering
|
||||
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
|
||||
_, cluster_labels, centers = cv2.kmeans(features, num_colors, None, criteria, 3, cv2.KMEANS_PP_CENTERS)
|
||||
|
||||
cluster_labels = cluster_labels.flatten()
|
||||
|
||||
# Create sub-edges (keep them separate!)
|
||||
sub_edges = []
|
||||
|
||||
for cluster_id in range(num_colors):
|
||||
cluster_mask_1d = (cluster_labels == cluster_id)
|
||||
|
||||
# Create separate image for this sub-edge
|
||||
sub_edge_img = np.zeros_like(edges_bgra)
|
||||
cluster_y = valid_y[cluster_mask_1d]
|
||||
cluster_x = valid_x[cluster_mask_1d]
|
||||
sub_edge_img[cluster_y, cluster_x] = edges_bgra[cluster_y, cluster_x]
|
||||
|
||||
sub_edges.append((sub_edge_img, cluster_id))
|
||||
|
||||
return sub_edges
|
||||
|
||||
|
||||
def get_edge_mode_color(edge_img, edge_mask):
|
||||
"""
|
||||
Get the mode (most common) color of an edge.
|
||||
"""
|
||||
bgr = edge_img[:, :, :3]
|
||||
pixels = bgr[edge_mask]
|
||||
|
||||
# Filter white
|
||||
non_white = pixels[~np.all(pixels > 240, axis=1)]
|
||||
|
||||
if len(non_white) == 0:
|
||||
return None
|
||||
|
||||
# Convert to HSV
|
||||
hsv = cv2.cvtColor(non_white.reshape(-1, 1, 3), cv2.COLOR_BGR2HSV).reshape(-1, 3)
|
||||
|
||||
# Filter low saturation
|
||||
saturated_mask = (hsv[:, 1] > 30)
|
||||
saturated_pixels = non_white[saturated_mask]
|
||||
|
||||
if len(saturated_pixels) == 0:
|
||||
saturated_pixels = non_white
|
||||
|
||||
# Get mode color
|
||||
pixel_ints = (saturated_pixels[:, 0].astype(np.int32) +
|
||||
saturated_pixels[:, 1].astype(np.int32) * 256 +
|
||||
saturated_pixels[:, 2].astype(np.int32) * 65536)
|
||||
|
||||
mode_int = np.bincount(pixel_ints).argmax()
|
||||
|
||||
mode_color = np.array([
|
||||
mode_int % 256,
|
||||
(mode_int // 256) % 256,
|
||||
(mode_int // 65536) % 256
|
||||
], dtype=np.uint8)
|
||||
|
||||
return mode_color
|
||||
|
||||
|
||||
def process_and_group_edges(edges_bgra, color_threshold=30):
|
||||
"""
|
||||
Process edges: split multi-color edges, then group by color.
|
||||
Edges remain separate (segregated) even within groups.
|
||||
|
||||
Returns:
|
||||
List of (group_image, mode_color, edge_count) tuples
|
||||
"""
|
||||
alpha = edges_bgra[:, :, 3]
|
||||
|
||||
# Find connected components
|
||||
num_labels, labels = cv2.connectedComponents(alpha)
|
||||
|
||||
print(f" Found {num_labels - 1} edges")
|
||||
|
||||
if num_labels <= 1:
|
||||
return []
|
||||
|
||||
# Process each edge: split if multi-color
|
||||
all_edge_images = []
|
||||
|
||||
for edge_id in range(1, num_labels):
|
||||
edge_mask = (labels == edge_id)
|
||||
|
||||
if not np.any(edge_mask):
|
||||
continue
|
||||
|
||||
# Analyze colors
|
||||
has_multiple, num_colors, hues = analyze_edge_colors(edges_bgra, edge_mask)
|
||||
|
||||
if has_multiple:
|
||||
print(f" Edge {edge_id}: {num_colors} colors detected, splitting...")
|
||||
# Split into sub-edges
|
||||
sub_edges = split_edge_by_color(edges_bgra, edge_mask, labels, edge_id, num_colors)
|
||||
all_edge_images.extend(sub_edges)
|
||||
else:
|
||||
# Keep as single edge
|
||||
edge_img = np.zeros_like(edges_bgra)
|
||||
edge_img[edge_mask] = edges_bgra[edge_mask]
|
||||
all_edge_images.append((edge_img, 0))
|
||||
|
||||
print(f" Total edges after splitting: {len(all_edge_images)}")
|
||||
|
||||
# Get mode color for each edge
|
||||
edge_colors = []
|
||||
for edge_img, cluster_id in all_edge_images:
|
||||
edge_mask = edge_img[:, :, 3] > 0
|
||||
mode_color = get_edge_mode_color(edge_img, edge_mask)
|
||||
edge_colors.append(mode_color)
|
||||
|
||||
# Group by similar colors
|
||||
groups = []
|
||||
used_indices = set()
|
||||
|
||||
for i, mode_color in enumerate(edge_colors):
|
||||
if i in used_indices or mode_color is None:
|
||||
continue
|
||||
|
||||
# Start new group
|
||||
group_indices = [i]
|
||||
used_indices.add(i)
|
||||
|
||||
# Find similar edges
|
||||
for j, other_color in enumerate(edge_colors):
|
||||
if j in used_indices or other_color is None:
|
||||
continue
|
||||
|
||||
# Calculate color distance
|
||||
distance = np.linalg.norm(mode_color.astype(float) - other_color.astype(float))
|
||||
|
||||
if distance <= color_threshold:
|
||||
group_indices.append(j)
|
||||
used_indices.add(j)
|
||||
|
||||
# Create group image (edges remain separate!)
|
||||
group_img = np.zeros_like(edges_bgra)
|
||||
for idx in group_indices:
|
||||
edge_img, _ = all_edge_images[idx]
|
||||
mask = edge_img[:, :, 3] > 0
|
||||
group_img[mask] = edge_img[mask]
|
||||
|
||||
groups.append((group_img, mode_color, len(group_indices)))
|
||||
|
||||
print(f" Grouped into {len(groups)} color groups")
|
||||
|
||||
return groups
|
||||
|
||||
|
||||
def process_pdf(pdf_path, output_dir, dpi=200):
|
||||
clear_output_directory(output_dir)
|
||||
|
||||
print(f"Processing PDF: {pdf_path}")
|
||||
print(f"Converting at {dpi} DPI...\n")
|
||||
|
||||
images = convert_from_path(pdf_path, dpi=dpi)
|
||||
print(f"Total pages: {len(images)}\n")
|
||||
|
||||
for page_num, pil_image in enumerate(images, start=1):
|
||||
print(f"Page {page_num}/{len(images)}...")
|
||||
|
||||
# Convert to BGR
|
||||
image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
|
||||
|
||||
# Enhance pastel colors
|
||||
print(f" - Enhancing pastel colors...")
|
||||
enhanced_image = enhance_pastel_colors(image)
|
||||
|
||||
# Detect edges
|
||||
print(f" - Detecting edges...")
|
||||
edges = sobel_edge_detection(enhanced_image)
|
||||
|
||||
# Process and group edges
|
||||
print(f" - Processing and grouping edges by color...")
|
||||
groups = process_and_group_edges(edges, color_threshold=30)
|
||||
|
||||
# Save outputs
|
||||
base = f"page{page_num:03d}"
|
||||
|
||||
cv2.imwrite(os.path.join(output_dir, f"{base}_original.png"), image)
|
||||
cv2.imwrite(os.path.join(output_dir, f"{base}_enhanced.png"), enhanced_image)
|
||||
cv2.imwrite(os.path.join(output_dir, f"{base}_edges.png"), edges)
|
||||
|
||||
# Save each group
|
||||
for group_idx, (group_img, mode_color, edge_count) in enumerate(groups, start=1):
|
||||
path = os.path.join(output_dir, f"{base}_group{group_idx}.png")
|
||||
cv2.imwrite(path, group_img)
|
||||
print(f" Group {group_idx}: {edge_count} edges, mode color (BGR): {tuple(mode_color)}")
|
||||
|
||||
print(f" - Saved {len(groups)} group images\n")
|
||||
|
||||
print("Complete!")
|
||||
|
||||
|
||||
def main():
|
||||
pdf_path = "input.pdf"
|
||||
output_dir = "output_sobel"
|
||||
|
||||
if not os.path.exists(pdf_path):
|
||||
print(f"Error: '{pdf_path}' not found!")
|
||||
return 1
|
||||
|
||||
try:
|
||||
process_pdf(pdf_path, output_dir)
|
||||
return 0
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
Reference in New Issue
Block a user