Feat: Desacoplamento
This commit is contained in:
0
code/services/__init__.py
Normal file
0
code/services/__init__.py
Normal file
46
code/services/authorization.py
Normal file
46
code/services/authorization.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import json
|
||||
import time
|
||||
|
||||
from utils.langgraph_agent import RULES, run_agent
|
||||
|
||||
|
||||
async def evaluate_servico(servico: dict, guia: dict, file_content: str) -> dict:
|
||||
codigo_servico_raw = str(servico.get("codigoServico", ""))
|
||||
code = "".join(c for c in codigo_servico_raw if c.isdigit())
|
||||
|
||||
if code not in RULES:
|
||||
return {
|
||||
"codigoServico": codigo_servico_raw,
|
||||
"resultado": "SKIPPED",
|
||||
"motivo": f"Codigo '{code}' nao encontrado nas regras",
|
||||
"agentOutput": "",
|
||||
"tempoAgentSegundos": 0,
|
||||
}
|
||||
|
||||
query_data = {
|
||||
"atendimento": guia.get("atendimento", {}),
|
||||
"guia": guia.get("guia", {}),
|
||||
"servico": servico,
|
||||
"historico": guia.get("historico", {}),
|
||||
}
|
||||
query = json.dumps(query_data, indent=2, ensure_ascii=False)
|
||||
|
||||
t0 = time.time()
|
||||
try:
|
||||
result = await run_agent(query, code, file_content)
|
||||
agent_output = result["response"]
|
||||
input_tokens = result["input_tokens"]
|
||||
output_tokens = result["output_tokens"]
|
||||
except Exception as e:
|
||||
agent_output = f"ERROR: {str(e)}"
|
||||
input_tokens = 0
|
||||
output_tokens = 0
|
||||
|
||||
return {
|
||||
"codigoServico": codigo_servico_raw,
|
||||
"resultado": "Aprovado" if "".join(c for c in agent_output.lower() if c.isalpha()).startswith("aprov") else "Reprovado",
|
||||
"agentOutput": agent_output,
|
||||
"input_tokens": input_tokens,
|
||||
"output_tokens": output_tokens,
|
||||
"tempoAgentSegundos": round(time.time() - t0, 2),
|
||||
}
|
||||
75
code/services/document_extractor.py
Normal file
75
code/services/document_extractor.py
Normal file
@@ -0,0 +1,75 @@
|
||||
import asyncio
|
||||
import io
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import boto3
|
||||
from PyPDF2 import PdfReader, PdfWriter
|
||||
|
||||
from utils.config import AWS_REGION
|
||||
from utils.secrets_manager import SECRETS
|
||||
|
||||
_s3_input = boto3.client(
|
||||
"s3",
|
||||
aws_access_key_id=SECRETS["AWS_ACCESS_KEY"],
|
||||
aws_secret_access_key=SECRETS["AWS_SECRET_KEY"],
|
||||
region_name=AWS_REGION,
|
||||
)
|
||||
_textract = boto3.client("textract", region_name=AWS_REGION)
|
||||
|
||||
|
||||
def parse_s3_uri(s3_uri: str) -> tuple[str, str]:
|
||||
parsed = urlparse(s3_uri)
|
||||
if parsed.scheme != "s3":
|
||||
raise ValueError(f"Not an S3 URI: {s3_uri}")
|
||||
bucket = parsed.netloc
|
||||
key = parsed.path.lstrip("/")
|
||||
if not bucket or not key:
|
||||
raise ValueError(f"Invalid S3 URI: {s3_uri}")
|
||||
return bucket, key
|
||||
|
||||
|
||||
def _extract_text_from_textract_response(response: dict) -> str:
|
||||
if not response:
|
||||
return ""
|
||||
return "\n".join(
|
||||
block["Text"] for block in response.get("Blocks", [])
|
||||
if block["BlockType"] == "LINE"
|
||||
)
|
||||
|
||||
|
||||
def _split_pdf_pages(pdf_bytes: bytes) -> list[bytes]:
|
||||
reader = PdfReader(io.BytesIO(pdf_bytes))
|
||||
pages = []
|
||||
for page in reader.pages:
|
||||
writer = PdfWriter()
|
||||
writer.add_page(page)
|
||||
buf = io.BytesIO()
|
||||
writer.write(buf)
|
||||
pages.append(buf.getvalue())
|
||||
return pages
|
||||
|
||||
|
||||
def _textract_detect_bytes(file_bytes: bytes) -> str:
|
||||
response = _textract.detect_document_text(Document={"Bytes": file_bytes})
|
||||
return _extract_text_from_textract_response(response)
|
||||
|
||||
|
||||
async def extract_text_from_s3_document(bucket: str, key: str) -> tuple[str, int]:
|
||||
file_bytes = await asyncio.to_thread(
|
||||
lambda: _s3_input.get_object(Bucket=bucket, Key=key)["Body"].read()
|
||||
)
|
||||
file_ext = Path(key).suffix.lower()
|
||||
|
||||
if file_ext in [".png", ".jpg", ".jpeg"]:
|
||||
text = await asyncio.to_thread(_textract_detect_bytes, file_bytes)
|
||||
return text, 1
|
||||
|
||||
if file_ext == ".pdf":
|
||||
page_bytes_list = await asyncio.to_thread(_split_pdf_pages, file_bytes)
|
||||
texts = await asyncio.gather(*[
|
||||
asyncio.to_thread(_textract_detect_bytes, p) for p in page_bytes_list
|
||||
])
|
||||
return "\n".join(texts), len(page_bytes_list)
|
||||
|
||||
return "", 0
|
||||
29
code/services/result_store.py
Normal file
29
code/services/result_store.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import asyncio
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
import boto3
|
||||
|
||||
from utils.config import AWS_REGION, OUTPUT_BUCKET, API_VERSION
|
||||
|
||||
_s3_output = boto3.client("s3", region_name=AWS_REGION)
|
||||
|
||||
|
||||
async def save_results(results: list[dict]) -> None:
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
async def _save_one(guia_result: dict):
|
||||
numero_guia = guia_result.get("guia", {}).get("codigoGuiaLocal", "unknown")
|
||||
key = f"{API_VERSION}/{numero_guia}_{timestamp}.json"
|
||||
await asyncio.to_thread(
|
||||
_s3_output.put_object,
|
||||
Bucket=OUTPUT_BUCKET,
|
||||
Key=key,
|
||||
Body=json.dumps(guia_result, ensure_ascii=False),
|
||||
ContentType="application/json",
|
||||
)
|
||||
|
||||
try:
|
||||
await asyncio.gather(*[_save_one(g) for g in results])
|
||||
except Exception:
|
||||
pass
|
||||
Reference in New Issue
Block a user