Initial commit
This commit is contained in:
181
label/infra/code/function_a.py
Normal file
181
label/infra/code/function_a.py
Normal file
@@ -0,0 +1,181 @@
|
||||
import boto3
|
||||
import os
|
||||
import tempfile
|
||||
import json
|
||||
from urllib.parse import urlparse
|
||||
from diagram_processor import DiagramProcessor
|
||||
|
||||
|
||||
def parse_s3_path(s3_path):
|
||||
"""
|
||||
Parse S3 path into bucket and key
|
||||
|
||||
Args:
|
||||
s3_path: S3 path like 's3://bucket-name/path/to/file.pdf'
|
||||
|
||||
Returns:
|
||||
Tuple (bucket, key)
|
||||
"""
|
||||
if not s3_path.startswith('s3://'):
|
||||
raise ValueError(f"Invalid S3 path: {s3_path}. Must start with 's3://'")
|
||||
|
||||
parsed = urlparse(s3_path)
|
||||
bucket = parsed.netloc
|
||||
key = parsed.path.lstrip('/')
|
||||
|
||||
return bucket, key
|
||||
|
||||
|
||||
def download_from_s3(s3_path, local_path):
|
||||
"""
|
||||
Download file from S3
|
||||
|
||||
Args:
|
||||
s3_path: S3 path (s3://bucket/key)
|
||||
local_path: Local file path to save to
|
||||
"""
|
||||
bucket, key = parse_s3_path(s3_path)
|
||||
|
||||
s3_client = boto3.client('s3')
|
||||
print(f"Downloading from S3: {s3_path}")
|
||||
s3_client.download_file(bucket, key, local_path)
|
||||
print(f"Downloaded to: {local_path}")
|
||||
|
||||
|
||||
def execute(s3_path):
|
||||
"""
|
||||
Function A - Process diagram from S3 and return matches only
|
||||
|
||||
Args:
|
||||
s3_path: S3 path to diagram (e.g., 's3://my-bucket/diagrams/diagram.pdf')
|
||||
|
||||
Returns:
|
||||
Dictionary with matches of labels and blocks
|
||||
"""
|
||||
print(f"Function A - Diagram Processing")
|
||||
print(f"Input S3 path: {s3_path}")
|
||||
|
||||
# Create temporary directory
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
# Download diagram from S3
|
||||
bucket, key = parse_s3_path(s3_path)
|
||||
input_file = os.path.join(temp_dir, os.path.basename(key))
|
||||
download_from_s3(s3_path, input_file)
|
||||
|
||||
# Create output directory for processing
|
||||
output_dir = os.path.join(temp_dir, 'output')
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# Initialize processor
|
||||
print("\nInitializing DiagramProcessor...")
|
||||
processor = DiagramProcessor(
|
||||
region=os.environ.get('AWS_REGION', 'us-east-1'),
|
||||
custom_labels_arn=os.environ.get('CUSTOM_LABELS_ARN', 'arn:aws:rekognition:us-east-1:173378533286:project/labels-valvula/version/labels-valvula.2025-11-24T15.44.16/1764009856090')
|
||||
)
|
||||
|
||||
# Process diagram
|
||||
print("\nProcessing diagram...")
|
||||
try:
|
||||
results = processor.process_single_diagram(
|
||||
diagram_path=input_file,
|
||||
output_base_dir=output_dir,
|
||||
grid_size=(5, 5),
|
||||
overlap_percent=10,
|
||||
keep_regex_list=[r'\+', r'\+', r'.*[Xx].*', r'\*', r'\\'],
|
||||
min_confidence=80,
|
||||
custom_labels_confidence=60,
|
||||
iou_threshold=0.3,
|
||||
matching_max_distance=200
|
||||
)
|
||||
|
||||
# Extract only the matches
|
||||
matching_results = results['matching_results']
|
||||
|
||||
# Format matches for clean output
|
||||
formatted_matches = []
|
||||
for match in matching_results['matches']:
|
||||
match_type = match.get('match_type', 'vm_label')
|
||||
|
||||
if match_type == 'two_labels':
|
||||
formatted_match = {
|
||||
'object_name': match['object_name'],
|
||||
'object_confidence': round(match['object_confidence'], 2),
|
||||
'match_type': match_type,
|
||||
'text_top': match['text_top'],
|
||||
'text_top_confidence': round(match['text_confidence_top'], 2),
|
||||
'text_bottom': match['text_bottom'],
|
||||
'text_bottom_confidence': round(match['text_confidence_bottom'], 2),
|
||||
'object_bbox': match['object_bbox'],
|
||||
'text_bbox_top': match['text_bbox_top'],
|
||||
'text_bbox_bottom': match['text_bbox_bottom']
|
||||
}
|
||||
else:
|
||||
formatted_match = {
|
||||
'object_name': match['object_name'],
|
||||
'object_confidence': round(match['object_confidence'], 2),
|
||||
'match_type': match_type,
|
||||
'text': match['text'],
|
||||
'text_confidence': round(match['text_confidence'], 2),
|
||||
'distance_pixels': round(match['distance_pixels'], 2),
|
||||
'object_bbox': match['object_bbox'],
|
||||
'text_bbox': match['text_bbox']
|
||||
}
|
||||
|
||||
formatted_matches.append(formatted_match)
|
||||
|
||||
# Format unmatched objects
|
||||
unmatched_objects = [
|
||||
{
|
||||
'name': obj['Name'],
|
||||
'confidence': round(obj['Confidence'], 2),
|
||||
'bbox': obj['global_bbox']
|
||||
}
|
||||
for obj in matching_results['unmatched_objects']
|
||||
]
|
||||
|
||||
# Format unmatched texts
|
||||
unmatched_texts = [
|
||||
{
|
||||
'text': text['text'],
|
||||
'confidence': round(text['confidence'], 2),
|
||||
'bbox': text['global_bbox']
|
||||
}
|
||||
for text in matching_results['unmatched_texts']
|
||||
]
|
||||
|
||||
# Prepare response
|
||||
response = {
|
||||
'status': 'success',
|
||||
'input_s3_path': s3_path,
|
||||
'summary': {
|
||||
'total_matches': len(formatted_matches),
|
||||
'unmatched_objects': len(unmatched_objects),
|
||||
'unmatched_texts': len(unmatched_texts),
|
||||
'matching_rate': f"{matching_results['matching_rate']*100:.1f}%"
|
||||
},
|
||||
'matches': formatted_matches,
|
||||
'unmatched_objects': unmatched_objects,
|
||||
'unmatched_texts': unmatched_texts
|
||||
}
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("PROCESSING COMPLETE")
|
||||
print("="*80)
|
||||
print(f"Total matches: {len(formatted_matches)}")
|
||||
print(f"Matching rate: {matching_results['matching_rate']*100:.1f}%")
|
||||
print(f"Unmatched objects: {len(unmatched_objects)}")
|
||||
print(f"Unmatched texts: {len(unmatched_texts)}")
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
error_message = f"Error processing diagram: {str(e)}"
|
||||
print(error_message)
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
return {
|
||||
'status': 'error',
|
||||
'error': error_message,
|
||||
'input_s3_path': s3_path
|
||||
}
|
||||
Reference in New Issue
Block a user