181 lines
6.5 KiB
Python
181 lines
6.5 KiB
Python
import boto3
|
|
import os
|
|
import tempfile
|
|
import json
|
|
from urllib.parse import urlparse
|
|
from diagram_processor import DiagramProcessor
|
|
|
|
|
|
def parse_s3_path(s3_path):
|
|
"""
|
|
Parse S3 path into bucket and key
|
|
|
|
Args:
|
|
s3_path: S3 path like 's3://bucket-name/path/to/file.pdf'
|
|
|
|
Returns:
|
|
Tuple (bucket, key)
|
|
"""
|
|
if not s3_path.startswith('s3://'):
|
|
raise ValueError(f"Invalid S3 path: {s3_path}. Must start with 's3://'")
|
|
|
|
parsed = urlparse(s3_path)
|
|
bucket = parsed.netloc
|
|
key = parsed.path.lstrip('/')
|
|
|
|
return bucket, key
|
|
|
|
|
|
def download_from_s3(s3_path, local_path):
|
|
"""
|
|
Download file from S3
|
|
|
|
Args:
|
|
s3_path: S3 path (s3://bucket/key)
|
|
local_path: Local file path to save to
|
|
"""
|
|
bucket, key = parse_s3_path(s3_path)
|
|
|
|
s3_client = boto3.client('s3')
|
|
print(f"Downloading from S3: {s3_path}")
|
|
s3_client.download_file(bucket, key, local_path)
|
|
print(f"Downloaded to: {local_path}")
|
|
|
|
|
|
def execute(s3_path):
|
|
"""
|
|
Function A - Process diagram from S3 and return matches only
|
|
|
|
Args:
|
|
s3_path: S3 path to diagram (e.g., 's3://my-bucket/diagrams/diagram.pdf')
|
|
|
|
Returns:
|
|
Dictionary with matches of labels and blocks
|
|
"""
|
|
print(f"Function A - Diagram Processing")
|
|
print(f"Input S3 path: {s3_path}")
|
|
|
|
# Create temporary directory
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
# Download diagram from S3
|
|
bucket, key = parse_s3_path(s3_path)
|
|
input_file = os.path.join(temp_dir, os.path.basename(key))
|
|
download_from_s3(s3_path, input_file)
|
|
|
|
# Create output directory for processing
|
|
output_dir = os.path.join(temp_dir, 'output')
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
# Initialize processor
|
|
print("\nInitializing DiagramProcessor...")
|
|
processor = DiagramProcessor(
|
|
region=os.environ.get('AWS_REGION', 'us-east-1'),
|
|
custom_labels_arn=os.environ.get('CUSTOM_LABELS_ARN', 'arn:aws:rekognition:us-east-1:173378533286:project/labels-valvula/version/labels-valvula.2025-11-24T15.44.16/1764009856090')
|
|
)
|
|
|
|
# Process diagram
|
|
print("\nProcessing diagram...")
|
|
try:
|
|
results = processor.process_single_diagram(
|
|
diagram_path=input_file,
|
|
output_base_dir=output_dir,
|
|
grid_size=(5, 5),
|
|
overlap_percent=10,
|
|
keep_regex_list=[r'\+', r'\+', r'.*[Xx].*', r'\*', r'\\'],
|
|
min_confidence=80,
|
|
custom_labels_confidence=60,
|
|
iou_threshold=0.3,
|
|
matching_max_distance=200
|
|
)
|
|
|
|
# Extract only the matches
|
|
matching_results = results['matching_results']
|
|
|
|
# Format matches for clean output
|
|
formatted_matches = []
|
|
for match in matching_results['matches']:
|
|
match_type = match.get('match_type', 'vm_label')
|
|
|
|
if match_type == 'two_labels':
|
|
formatted_match = {
|
|
'object_name': match['object_name'],
|
|
'object_confidence': round(match['object_confidence'], 2),
|
|
'match_type': match_type,
|
|
'text_top': match['text_top'],
|
|
'text_top_confidence': round(match['text_confidence_top'], 2),
|
|
'text_bottom': match['text_bottom'],
|
|
'text_bottom_confidence': round(match['text_confidence_bottom'], 2),
|
|
'object_bbox': match['object_bbox'],
|
|
'text_bbox_top': match['text_bbox_top'],
|
|
'text_bbox_bottom': match['text_bbox_bottom']
|
|
}
|
|
else:
|
|
formatted_match = {
|
|
'object_name': match['object_name'],
|
|
'object_confidence': round(match['object_confidence'], 2),
|
|
'match_type': match_type,
|
|
'text': match['text'],
|
|
'text_confidence': round(match['text_confidence'], 2),
|
|
'distance_pixels': round(match['distance_pixels'], 2),
|
|
'object_bbox': match['object_bbox'],
|
|
'text_bbox': match['text_bbox']
|
|
}
|
|
|
|
formatted_matches.append(formatted_match)
|
|
|
|
# Format unmatched objects
|
|
unmatched_objects = [
|
|
{
|
|
'name': obj['Name'],
|
|
'confidence': round(obj['Confidence'], 2),
|
|
'bbox': obj['global_bbox']
|
|
}
|
|
for obj in matching_results['unmatched_objects']
|
|
]
|
|
|
|
# Format unmatched texts
|
|
unmatched_texts = [
|
|
{
|
|
'text': text['text'],
|
|
'confidence': round(text['confidence'], 2),
|
|
'bbox': text['global_bbox']
|
|
}
|
|
for text in matching_results['unmatched_texts']
|
|
]
|
|
|
|
# Prepare response
|
|
response = {
|
|
'status': 'success',
|
|
'input_s3_path': s3_path,
|
|
'summary': {
|
|
'total_matches': len(formatted_matches),
|
|
'unmatched_objects': len(unmatched_objects),
|
|
'unmatched_texts': len(unmatched_texts),
|
|
'matching_rate': f"{matching_results['matching_rate']*100:.1f}%"
|
|
},
|
|
'matches': formatted_matches,
|
|
'unmatched_objects': unmatched_objects,
|
|
'unmatched_texts': unmatched_texts
|
|
}
|
|
|
|
print("\n" + "="*80)
|
|
print("PROCESSING COMPLETE")
|
|
print("="*80)
|
|
print(f"Total matches: {len(formatted_matches)}")
|
|
print(f"Matching rate: {matching_results['matching_rate']*100:.1f}%")
|
|
print(f"Unmatched objects: {len(unmatched_objects)}")
|
|
print(f"Unmatched texts: {len(unmatched_texts)}")
|
|
|
|
return response
|
|
|
|
except Exception as e:
|
|
error_message = f"Error processing diagram: {str(e)}"
|
|
print(error_message)
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
return {
|
|
'status': 'error',
|
|
'error': error_message,
|
|
'input_s3_path': s3_path
|
|
} |