Adds api functionality

2026-02-04 13:29:15 -03:00
parent fd6756c507
commit 5717cdd254
6 changed files with 825 additions and 132 deletions
--- a/code/app.py
+++ b/code/app.py
@@ -1,156 +1,204 @@
-from fastapi import FastAPI,Header
+from fastapi import FastAPI
+from pydantic import BaseModel
 import uvicorn
-"""
-Simple LangGraph Agent using @tool decorator
+import boto3
+import json
+import time
+import io
+from pathlib import Path
+from urllib.parse import urlparse
+from PyPDF2 import PdfReader

-Clean implementation with decorator-based tool definitions.
-"""
+from utils.langgraph_agent import RULES, run_agent

-from typing import Annotated, TypedDict
-from langgraph.graph import StateGraph, START, END
-from langgraph.graph.message import add_messages
-from langgraph.prebuilt import ToolNode, tools_condition
-from langchain_core.messages import HumanMessage, SystemMessage
-from langchain_core.tools import tool
-from langchain_aws import ChatBedrock
 app = FastAPI()

-# Define tools using @tool decorator
-rules="""- Mulheres acima de 45 anos ou menopausada
- Homens com mais de 70 anos;
- Osteogênese imperfeita (para esta patologia, poderá haver a liberação de (02) dois 
-exames ao ano - cada 180 dias);
- RX com osteopenia ou fratura patológica;
- Antecedente pessoal de fratura após os 40 anos: punho, ombros, vértebras, quadril;
- Parente de primeiro grau com osteoporose.
- Mulheres com massa corporal <20kg/m2 ou peso < 57,8kg;
- Menopausa antes dos 45 anos ou hipogonasismo crônico (falência ovariana 
-precoce);
- Uso de glicocorticóides (>=7,5 prednizona/ dia equivalente por mais três meses, ou 
-presença de síndrome de cushing;
- Hiperparatireoidismo primário;
- Uso prolongado de anticonvulsivantes (< 10 anos);
- Síndrome de má absorção crônica ou desnutrição doenças inflamatória intestinal 
-(independente da causa: bariatrica, celiacos, intolerancia a lactose).
- Quimioterapia, se sobrevida esperada for longa (< 5 anos);
- Diminuição documentada de altura;
- Presença de cifose após menopausa.
- Imobilização prolongada"""
-SYSTEM_PROMPT="""
-You are a assisant, your job is to aprove or not a procedure based on the following rules:"""
-+rules+"""
-Your input will be a json, evaluate it based on the rules and return:
-Aproved: If one of the criteira is met
-Reproved: If not a single one of the criterias are met."""
-@tool
-def add(a: int, b: int) -> int:
-    """Add two numbers together.
-    
-    Args:
-        a: First number
-        b: Second number
-    """
-    return a + b
+AWS_REGION = "us-east-2"


-@tool
-def multiply(a: int, b: int) -> int:
-    """Multiply two numbers.
-    
-    Args:
-        a: First number
-        b: Second number
-    """
-    return a * b
+# --- S3 / Textract helpers ---
+
+def parse_s3_uri(s3_uri: str) -> tuple[str, str]:
+    parsed = urlparse(s3_uri)
+    if parsed.scheme != "s3":
+        raise ValueError(f"Not an S3 URI: {s3_uri}")
+    bucket = parsed.netloc
+    key = parsed.path.lstrip("/")
+    if not bucket or not key:
+        raise ValueError(f"Invalid S3 URI: {s3_uri}")
+    return bucket, key


-@tool
-def get_word_length(word: str) -> int:
-    """Get the length of a word.
-    
-    Args:
-        word: The word to measure
-    """
-    return len(word)
+def get_s3_client():
+    return boto3.client("s3", region_name=AWS_REGION)


-@tool
-def search_info(topic: str) -> str:
-    """Search for information about a topic (mock implementation).
-    
-    Args:
-        topic: The topic to search for
-    """
-    # Mock response - replace with actual search/API
-    return f"Information about {topic}: This is a mock response. In production, this would return real data."
+def get_textract_client():
+    return boto3.client("textract", region_name=AWS_REGION)


-# Define agent state
-class AgentState(TypedDict):
-    messages: Annotated[list, add_messages]
+def get_pdf_page_count(pdf_bytes: bytes) -> int:
+    try:
+        return len(PdfReader(io.BytesIO(pdf_bytes)).pages)
+    except Exception:
+        return 1


-# Define tools list
-tools = [add, multiply, get_word_length, search_info]
-
-
-# Agent node
-def call_model(state: AgentState):
-    """Call the LLM with current state and tools."""
-    
-    model = ChatBedrock(
-        model_id="arn:aws:bedrock:us-east-2:232048051668:application-inference-profile/uy4xskop19zn",
-        region_name="us-east-2",
-        provider="anthropic"
+def extract_text_from_textract_response(response: dict) -> str:
+    if not response:
+        return ""
+    return "\n".join(
+        block["Text"] for block in response.get("Blocks", [])
+        if block["BlockType"] == "LINE"
    )
-    
-    model_with_tools = model.bind_tools(tools)
-    
-    messages = [
-        SystemMessage(content=SYSTEM_PROMPT)
-    ] + state["messages"]
-    
-    response = model_with_tools.invoke(messages)
-    
-    return {"messages": [response]}


-# Build the graph
-def create_agent():
-    """Create and compile the agent graph."""
-    
-    workflow = StateGraph(AgentState)
-    
-    # Add nodes
-    workflow.add_node("agent", call_model)
-    workflow.add_node("tools", ToolNode(tools))
-    
-    # Add edges
-    workflow.add_edge(START, "agent")
-    workflow.add_conditional_edges("agent", tools_condition)
-    workflow.add_edge("tools", "agent")
-    
-    return workflow.compile()
+def extract_text_from_s3_document(bucket: str, key: str) -> str:
+    s3 = get_s3_client()
+    textract = get_textract_client()
+    file_ext = Path(key).suffix.lower()

-
-# Main execution
-
-
-@app.post("/")
-async def root(json:str= Header(...)):
-    agent = create_agent()
-    query=json
-    result = agent.invoke(
-            {"messages": [HumanMessage(content=query)]},
-            config={"recursion_limit": 10}
+    if file_ext in [".png", ".jpg", ".jpeg"]:
+        response = textract.detect_document_text(
+            Document={"S3Object": {"Bucket": bucket, "Name": key}}
        )
-        
-    final_message = result["messages"][-1]
-    return {"status": "success", "message": final_message.content}
+        return extract_text_from_textract_response(response)
+
+    if file_ext == ".pdf":
+        obj = s3.get_object(Bucket=bucket, Key=key)
+        pdf_bytes = obj["Body"].read()
+        page_count = get_pdf_page_count(pdf_bytes)
+
+        if page_count > 1:
+            response = textract.start_document_text_detection(
+                DocumentLocation={"S3Object": {"Bucket": bucket, "Name": key}}
+            )
+            job_id = response["JobId"]
+            while True:
+                result = textract.get_document_text_detection(JobId=job_id)
+                status = result["JobStatus"]
+                if status == "SUCCEEDED":
+                    return extract_text_from_textract_response(result)
+                elif status == "FAILED":
+                    return ""
+                time.sleep(2)
+        else:
+            response = textract.detect_document_text(
+                Document={"S3Object": {"Bucket": bucket, "Name": key}}
+            )
+            return extract_text_from_textract_response(response)
+
+    return ""
+
+
+# --- Guia processing ---
+
+def process_guia(guia: dict) -> dict:
+    guia_code = guia.get("guia", {}).get("codigoGuiaLocal", "unknown")
+
+    # Step 1: Extract text from all anexos
+    anexos = guia.get("anexos", [])
+    all_extracted_texts = []
+
+    for anexo_idx, anexo in enumerate(anexos):
+        s3_uri = anexo.get("urlAnexo") or anexo.get("URLAnexo", "")
+        nome_arquivo = anexo.get("nomeArquivo", f"attachment_{anexo_idx}")
+
+        if not s3_uri or not s3_uri.startswith("s3://"):
+            anexo["textoExtraido"] = ""
+            continue
+
+        try:
+            bucket, key = parse_s3_uri(s3_uri)
+            extracted_text = extract_text_from_s3_document(bucket, key)
+        except Exception as e:
+            print(f"  Error extracting text from {nome_arquivo}: {e}")
+            extracted_text = ""
+
+        anexo["textoExtraido"] = extracted_text
+        all_extracted_texts.append(f"--- {nome_arquivo} ---\n{extracted_text}")
+
+    file_content = "\n\n".join(all_extracted_texts)
+
+    # Step 2: For each servico, run the agent
+    servicos = guia.get("servicos", [])
+    avaliacao_resultados = []
+
+    for servico in servicos:
+        codigo_servico_raw = str(servico.get("codigoServico", ""))
+        code = "".join(c for c in codigo_servico_raw if c.isdigit())
+
+        if code not in RULES:
+            avaliacao_resultados.append({
+                "codigoServico": codigo_servico_raw,
+                "resultado": "SKIPPED",
+                "motivo": f"Codigo '{code}' nao encontrado nas regras",
+                "agentOutput": ""
+            })
+            continue
+
+        query_data = {
+            "atendimento": guia.get("atendimento", {}),
+            "guia": guia.get("guia", {}),
+            "servico": servico,
+            "historico": guia.get("historico", {})
+        }
+        query = json.dumps(query_data, indent=2, ensure_ascii=False)
+
+        try:
+            agent_output = run_agent(query, code, file_content)
+        except Exception as e:
+            print(f"  Agent error for servico {codigo_servico_raw}: {e}")
+            agent_output = f"ERROR: {str(e)}"
+
+        avaliacao_resultados.append({
+            "codigoServico": codigo_servico_raw,
+            "resultado": "Aprovado" if "aprov" in "".join(c for c in agent_output.lower() if c.isalnum() or c == ' ') else "Reprovado",
+            "agentOutput": agent_output
+        })
+
+    guia["avaliacaoAgente"] = avaliacao_resultados
+    return guia
+
+
+# --- API models ---
+
+class ProcessRequest(BaseModel):
+    operadora: dict
+    guias: list[dict]
+
+
+# --- Endpoints ---
+
+@app.post("/process")
+async def process(request: ProcessRequest):
+    results = []
+    for idx, guia in enumerate(request.guias):
+        try:
+            enriched = process_guia(guia)
+            results.append(enriched)
+        except Exception as e:
+            results.append({
+                "error": str(e),
+                "guia": guia.get("guia", {}).get("codigoGuiaLocal", f"index_{idx}")
+            })
+
+    return {
+        "status": "success",
+        "operadora": request.operadora,
+        "guias": results
+    }
+

@app.get("/health")
 async def health():
    return {"status": "healthy"}

+
+@app.get("/rules")
+async def get_rules():
+    return {"codes": list(RULES.keys())}
+
+
 if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)