Feat: Desacoplamento

2026-03-12 15:02:26 -03:00
parent 08be8e314d
commit 8c3c23c65f
11 changed files with 231 additions and 163 deletions
--- a/code/services/init.py
+++ b/code/services/init.py
--- a/code/services/authorization.py
+++ b/code/services/authorization.py
@@ -0,0 +1,46 @@
+import json
+import time
+
+from utils.langgraph_agent import RULES, run_agent
+
+
+async def evaluate_servico(servico: dict, guia: dict, file_content: str) -> dict:
+    codigo_servico_raw = str(servico.get("codigoServico", ""))
+    code = "".join(c for c in codigo_servico_raw if c.isdigit())
+
+    if code not in RULES:
+        return {
+            "codigoServico": codigo_servico_raw,
+            "resultado": "SKIPPED",
+            "motivo": f"Codigo '{code}' nao encontrado nas regras",
+            "agentOutput": "",
+            "tempoAgentSegundos": 0,
+        }
+
+    query_data = {
+        "atendimento": guia.get("atendimento", {}),
+        "guia": guia.get("guia", {}),
+        "servico": servico,
+        "historico": guia.get("historico", {}),
+    }
+    query = json.dumps(query_data, indent=2, ensure_ascii=False)
+
+    t0 = time.time()
+    try:
+        result = await run_agent(query, code, file_content)
+        agent_output = result["response"]
+        input_tokens = result["input_tokens"]
+        output_tokens = result["output_tokens"]
+    except Exception as e:
+        agent_output = f"ERROR: {str(e)}"
+        input_tokens = 0
+        output_tokens = 0
+
+    return {
+        "codigoServico": codigo_servico_raw,
+        "resultado": "Aprovado" if "".join(c for c in agent_output.lower() if c.isalpha()).startswith("aprov") else "Reprovado",
+        "agentOutput": agent_output,
+        "input_tokens": input_tokens,
+        "output_tokens": output_tokens,
+        "tempoAgentSegundos": round(time.time() - t0, 2),
+    }
--- a/code/services/document_extractor.py
+++ b/code/services/document_extractor.py
@@ -0,0 +1,75 @@
+import asyncio
+import io
+from pathlib import Path
+from urllib.parse import urlparse
+
+import boto3
+from PyPDF2 import PdfReader, PdfWriter
+
+from utils.config import AWS_REGION
+from utils.secrets_manager import SECRETS
+
+_s3_input = boto3.client(
+    "s3",
+    aws_access_key_id=SECRETS["AWS_ACCESS_KEY"],
+    aws_secret_access_key=SECRETS["AWS_SECRET_KEY"],
+    region_name=AWS_REGION,
+)
+_textract = boto3.client("textract", region_name=AWS_REGION)
+
+
+def parse_s3_uri(s3_uri: str) -> tuple[str, str]:
+    parsed = urlparse(s3_uri)
+    if parsed.scheme != "s3":
+        raise ValueError(f"Not an S3 URI: {s3_uri}")
+    bucket = parsed.netloc
+    key = parsed.path.lstrip("/")
+    if not bucket or not key:
+        raise ValueError(f"Invalid S3 URI: {s3_uri}")
+    return bucket, key
+
+
+def _extract_text_from_textract_response(response: dict) -> str:
+    if not response:
+        return ""
+    return "\n".join(
+        block["Text"] for block in response.get("Blocks", [])
+        if block["BlockType"] == "LINE"
+    )
+
+
+def _split_pdf_pages(pdf_bytes: bytes) -> list[bytes]:
+    reader = PdfReader(io.BytesIO(pdf_bytes))
+    pages = []
+    for page in reader.pages:
+        writer = PdfWriter()
+        writer.add_page(page)
+        buf = io.BytesIO()
+        writer.write(buf)
+        pages.append(buf.getvalue())
+    return pages
+
+
+def _textract_detect_bytes(file_bytes: bytes) -> str:
+    response = _textract.detect_document_text(Document={"Bytes": file_bytes})
+    return _extract_text_from_textract_response(response)
+
+
+async def extract_text_from_s3_document(bucket: str, key: str) -> tuple[str, int]:
+    file_bytes = await asyncio.to_thread(
+        lambda: _s3_input.get_object(Bucket=bucket, Key=key)["Body"].read()
+    )
+    file_ext = Path(key).suffix.lower()
+
+    if file_ext in [".png", ".jpg", ".jpeg"]:
+        text = await asyncio.to_thread(_textract_detect_bytes, file_bytes)
+        return text, 1
+
+    if file_ext == ".pdf":
+        page_bytes_list = await asyncio.to_thread(_split_pdf_pages, file_bytes)
+        texts = await asyncio.gather(*[
+            asyncio.to_thread(_textract_detect_bytes, p) for p in page_bytes_list
+        ])
+        return "\n".join(texts), len(page_bytes_list)
+
+    return "", 0
--- a/code/services/result_store.py
+++ b/code/services/result_store.py
@@ -0,0 +1,29 @@
+import asyncio
+import json
+from datetime import datetime
+
+import boto3
+
+from utils.config import AWS_REGION, OUTPUT_BUCKET, API_VERSION
+
+_s3_output = boto3.client("s3", region_name=AWS_REGION)
+
+
+async def save_results(results: list[dict]) -> None:
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+
+    async def _save_one(guia_result: dict):
+        numero_guia = guia_result.get("guia", {}).get("codigoGuiaLocal", "unknown")
+        key = f"{API_VERSION}/{numero_guia}_{timestamp}.json"
+        await asyncio.to_thread(
+            _s3_output.put_object,
+            Bucket=OUTPUT_BUCKET,
+            Key=key,
+            Body=json.dumps(guia_result, ensure_ascii=False),
+            ContentType="application/json",
+        )
+
+    try:
+        await asyncio.gather(*[_save_one(g) for g in results])
+    except Exception:
+        pass