From a4d9583821970269f54e7ac1c16e510ff8745567 Mon Sep 17 00:00:00 2001 From: DNXBrasil Date: Mon, 16 Mar 2026 10:59:51 -0300 Subject: [PATCH] Feat: Pulumi S3 --- .gitignore | 2 +- back/app/backend/config.py | 3 +- back/app/backend/dynamo.py | 41 ++----- back/app/backend/orquestrador.py | 13 +-- back/app/backend/tools.py | 100 ++++-------------- back/app/front.py | 2 +- back/requirements.txt | 10 +- ...umi.ifsp-assistente-matricula-ecs-alb.yaml | 28 ++--- infra/s3/Pulumi.assistente-matricula.yaml | 9 -- .../s3/Pulumi.ifsp-assistente-matricula.yaml | 21 ++++ infra/s3/{main.py => __main__.py} | 2 +- 11 files changed, 74 insertions(+), 157 deletions(-) delete mode 100644 infra/s3/Pulumi.assistente-matricula.yaml create mode 100644 infra/s3/Pulumi.ifsp-assistente-matricula.yaml rename infra/s3/{main.py => __main__.py} (84%) diff --git a/.gitignore b/.gitignore index 3cb28fa..4c8727d 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,4 @@ venv/ .env .env.* pyproject.toml - +Makefile \ No newline at end of file diff --git a/back/app/backend/config.py b/back/app/backend/config.py index 539d24d..a025ed9 100644 --- a/back/app/backend/config.py +++ b/back/app/backend/config.py @@ -1,6 +1,5 @@ import os -TABLE = os.environ["TABLE"] REGION = os.environ["REGION"] AWS_ACCOUNT = os.environ["AWS_ACCOUNT"] -SECRET_NAME = os.environ["SECRET_NAME"] +KNOWLEDGE_BASE_ID = os.environ["KNOWLEDGE_BASE_ID"] diff --git a/back/app/backend/dynamo.py b/back/app/backend/dynamo.py index 61e8a59..749bea3 100644 --- a/back/app/backend/dynamo.py +++ b/back/app/backend/dynamo.py @@ -4,9 +4,8 @@ import os from botocore.exceptions import ClientError from langfuse import Langfuse -from .config import REGION, TABLE, SECRET_NAME +from .config import REGION, SECRET_NAME -dynamodb = boto3.resource("dynamodb", region_name=REGION) def get_secret() -> str: @@ -19,35 +18,9 @@ def get_secret() -> str: return response["SecretString"] -secrets = json.loads(get_secret()) -langfuse = Langfuse( - public_key=secrets["LANGFUSE-PUBLIC-KEY"], - secret_key=secrets["LANGFUSE-SECRET-KEY"], - host=os.environ["LANGFUSE_HOST"], -) - - -def get_contexto(dashboard: str) -> dict: - """ - Get contexto, filter, and items_disponiveis from DynamoDB for a given dashboard. - - Returns: - Dict with 'contexto', 'filter', and 'items_disponiveis' keys - """ - try: - table = dynamodb.Table(TABLE) - response = table.get_item(Key={"id": dashboard + "_contexto"}) - - if "Item" not in response: - return {"contexto": "", "filter": "", "items_disponiveis": {}} - - item = response["Item"] - return { - "contexto": item.get("contexto", ""), - "filter": item.get("filter_key", ""), - "items_disponiveis": item.get("itens_disponiveis", {}), - } - - except ClientError as e: - error_message = e.response["Error"]["Message"] - return {"contexto": f"Error: {error_message}", "filter": "", "items_disponiveis": {}} +#secrets = json.loads(get_secret()) +#langfuse = Langfuse( +# public_key=secrets["LANGFUSE-PUBLIC-KEY"], +# secret_key=secrets["LANGFUSE-SECRET-KEY"], +# host=os.environ["LANGFUSE_HOST"], +#) diff --git a/back/app/backend/orquestrador.py b/back/app/backend/orquestrador.py index 30080c5..7b91cda 100644 --- a/back/app/backend/orquestrador.py +++ b/back/app/backend/orquestrador.py @@ -2,17 +2,16 @@ from langchain_core.messages import HumanMessage, AIMessage, SystemMessage from langfuse.langchain import CallbackHandler from .config import REGION -from .dynamo import langfuse, get_contexto from .agent_bedrock import create_agent -from .tools import ReportTools +from .tools import build_knowledge_base_tool -def main(user_query, history, model, base): +def main(user_query, history, model="anthropic.claude-sonnet-4-5-20250929-v1:0"): """Main execution function.""" - report_tools = [] + report_tools = [build_knowledge_base_tool()] - SYSTEM_PROMPT = """""" + SYSTEM_PROMPT = """Você é um assistente de matrículas para o campus capivari do instituo federal de são paulo, tem acesso a uma tool que acessa uma knowledge base com informações sobre tanto a matricula dos alunos do técnico quanto superior do procedimento iterno, não responda perguntas sobre o meio de ingresso SISU.""" langfuse_handler = CallbackHandler() agent = create_agent(model, REGION, tools=report_tools) @@ -24,7 +23,7 @@ def main(user_query, history, model, base): "current_step": "init", } - config = {"callbacks": [langfuse_handler], "tags": [base]} + config = {"callbacks": [langfuse_handler]} final_state = agent.invoke(initial_state, config=config) total_input_tokens = 0 @@ -33,8 +32,6 @@ def main(user_query, history, model, base): if isinstance(msg, AIMessage) and hasattr(msg, "usage_metadata") and msg.usage_metadata: total_input_tokens += msg.usage_metadata.get("input_tokens", 0) total_output_tokens += msg.usage_metadata.get("output_tokens", 0) - - langfuse.flush() return { "response": final_state["messages"][-1].content, "input_tokens": total_input_tokens, diff --git a/back/app/backend/tools.py b/back/app/backend/tools.py index cfc063a..1192828 100644 --- a/back/app/backend/tools.py +++ b/back/app/backend/tools.py @@ -1,85 +1,21 @@ -from botocore.exceptions import ClientError -from langchain_core.tools import StructuredTool +from langchain_aws import AmazonKnowledgeBasesRetriever +from langchain_core.tools.retriever import create_retriever_tool -from .config import TABLE -from .dynamo import dynamodb +from .config import REGION, KNOWLEDGE_BASE_ID -class ReportTools: - def __init__(self, id_mapping: dict[str, str]): - self.id_mapping = id_mapping - - def get_variable_value(self, id: str, variable: str) -> str: - """ - Get a specific variable's value from DynamoDB for a specific id. - - Args: - id: The id of the data - variable: The variable/column name to retrieve from the table - - Returns: - The content of the specified variable for the given id - """ - real_id = self.id_mapping.get(id, id) - try: - table = dynamodb.Table(TABLE) - response = table.get_item(Key={"id": real_id}) - - if "Item" not in response: - return f"No report found for month: {id}" - - item = response["Item"] - content = item.get(variable, "") - - if not content: - return f"Variable '{variable}' not found for month: {id}" - - return f"<{id}>\n{content}\n" - - except ClientError as e: - error_message = e.response["Error"]["Message"] - return f"Error fetching report: {error_message}" - - def get_variables_list(self, id: str) -> str: - """ - Get the list of variables available in the table for a specific month. - - Args: - id: The id of the data - - Returns: - The list of available variables/keys for the specified data - """ - real_id = self.id_mapping.get(id, id) - try: - table = dynamodb.Table(TABLE) - response = table.get_item(Key={"id": real_id}) - - if "Item" not in response: - return f"No data found for month: {id}" - - item = response["Item"] - chaves_consolidadas = item.get("chaves_consolidadas", "") - - if not chaves_consolidadas: - return f"No consolidated keys found for id: {id}" - - return chaves_consolidadas - - except ClientError as e: - error_message = e.response["Error"]["Message"] - return f"Error fetching consolidated keys: {error_message}" - - def as_tools(self) -> list: - return [ - StructuredTool.from_function( - self.get_variable_value, - name="get_variable_value", - description="Get a specific variable's data from DynamoDB for a specific id.", - ), - StructuredTool.from_function( - self.get_variables_list, - name="get_variable_list", - description="Get the list of variables available in the table for a specific id.", - ), - ] +def build_knowledge_base_tool(): + retriever = AmazonKnowledgeBasesRetriever( + knowledge_base_id=KNOWLEDGE_BASE_ID, + retrieval_config={"vectorSearchConfiguration": {"numberOfResults": 5}}, + region_name=REGION, + ) + return create_retriever_tool( + retriever, + name="consultar_base_conhecimento", + description=( + "Consulta a base de conhecimento com informações sobre procedimentos internos de matrícula " + "para cursos técnicos e superiores do campus Capivari do IFSP. " + "Use esta ferramenta para responder dúvidas sobre matrícula." + ), + ) diff --git a/back/app/front.py b/back/app/front.py index e03b8c2..597dd13 100644 --- a/back/app/front.py +++ b/back/app/front.py @@ -41,7 +41,7 @@ if prompt := st.chat_input("Type your message here..."): # Simulate streaming response (replace with actual API call) - result = orquestrador.main(prompt,str(st.session_state.messages),selected_value,base) + result = orquestrador.main(prompt,str(st.session_state.messages)) full_response = result["response"] # Simulate typing effect diff --git a/back/requirements.txt b/back/requirements.txt index 162c2a8..bb527cc 100644 --- a/back/requirements.txt +++ b/back/requirements.txt @@ -1,8 +1,8 @@ boto3==1.42.10 -langchain-aws==1.1.0 -langgraph==1.0.5 +langchain-aws==1.3.0 +langgraph==1.0.9 langchain==1.2.0 -streamlit==1.52.2 -langfuse==3.11.2 -fastapi==0.129.0 +streamlit==1.54.0 +langfuse==3.14.5 +fastapi==0.133.0 uvicorn==0.41.0 diff --git a/infra/ecs_alb/Pulumi.ifsp-assistente-matricula-ecs-alb.yaml b/infra/ecs_alb/Pulumi.ifsp-assistente-matricula-ecs-alb.yaml index a7c8c6d..f70a480 100644 --- a/infra/ecs_alb/Pulumi.ifsp-assistente-matricula-ecs-alb.yaml +++ b/infra/ecs_alb/Pulumi.ifsp-assistente-matricula-ecs-alb.yaml @@ -1,32 +1,32 @@ config: aws:region: us-east-1 app-ecs:account_id: "305427701314" # dnxbrasil-nonprod - app-ecs:project_name: assistente-analitico + app-ecs:project_name: assistente-matricula app-ecs:environment: dev # app-ecs:bedrock_api_key: # secure: you-can-put-your-pulumi-encrypted-secure-string-here app-ecs:tags: - project: assistente-analitico-db-dev + project: assistente-matricula-dev env: dev # dev, test, stage, prod account: nonprod # prod, nonprod, dataScience costCenter: AI # AWSGeneral, AI, data, productName owner: AI # team or a preson responsible app-ecs:network: - vpc_id: vpc-17ceb96c + vpc_id: vpc-08b91683af59eab85 alb_internal: false alb_subnet_ids: # 2+ private subnets if alb_internal else public subnets in the same region and vpc - - subnet-0de9f056635629827 - - subnet-09cda74f27c543521 + - subnet-07886057270d8c0f3 + - subnet-0b40078e0a65d2a0c alb_allow_ingress_cidr: - 3.14.44.224/32 ecs_subnet_ids: - - subnet-0f50f25a2fbb054d4 - - subnet-043a427630309c2f4 + - subnet-07ccfbdb115f03d5e + - subnet-0e05d1de8407fb798 app-ecs:ecs: - - task_name: assisnte-analitico-db-dev - ecr_repo_name: assistente-analitico-db-dev + - task_name: assistente-matricula-dev + ecr_repo_name: assistente-matricula-dev ecr_image_tag: latest - ecr_image_digest: sha256:0bd3a927df4367ba29dbd173e0414d884e973c37599a3f6241341e8d190e827b + ecr_image_digest: sha256:a4ff97f1bbc3ba14d40b32e68f6fc431dd5f05c70aff2f8b914f503e62bc7230 cpu: 256 memory: 512 desired_count: 1 @@ -46,14 +46,14 @@ config: target_port: 8000 container_port: 8000 env_variables: - LANGFUSE_HOST: http://172.31.252.176:3000 - TABLE: poc_dnx_monthly_summary + LANGFUSE_HOST: http://0.0.0.0/0 REGION: us-east-1 AWS_ACCOUNT: "305427701314" - SECRET_NAME: assistente-db-secrets-manager + KNOWLEDGE_BASE_ID: LBH9H6QTAK + #SECRET_NAME: assistente-db-secrets-manager # SECRET_NAME: dev/ai-pge-doc-classification # BEDROCK_REGION: us-east-1 # LANGCHAIN_TRACING_V2: "true" # LANGCHAIN_PROJECT: pge-doc-classification-dev app-ecs:cloudwatch: - log_group_name: assistente-analitico-db-dev \ No newline at end of file + log_group_name: assistente-matricula-dev \ No newline at end of file diff --git a/infra/s3/Pulumi.assistente-matricula.yaml b/infra/s3/Pulumi.assistente-matricula.yaml deleted file mode 100644 index 412ddcb..0000000 --- a/infra/s3/Pulumi.assistente-matricula.yaml +++ /dev/null @@ -1,9 +0,0 @@ -config: - aws:region: us-east-1 - ia-bucket: - buckets: - - name: assistente-matricula-docs - - name: assistente-matricula-transcribe - tags: - Enviroment: dev - Owner: ai \ No newline at end of file diff --git a/infra/s3/Pulumi.ifsp-assistente-matricula.yaml b/infra/s3/Pulumi.ifsp-assistente-matricula.yaml new file mode 100644 index 0000000..612e584 --- /dev/null +++ b/infra/s3/Pulumi.ifsp-assistente-matricula.yaml @@ -0,0 +1,21 @@ +config: + aws:region: us-east-1 + ia-bucket:buckets: + - name: br-edu-ifsp-capivari-s3-docs-matricula-dev + - name: br-edu-ifsp-capivari-s3-transcribe-matricula-dev + ia-bucket:tags: + Nome: Bucket Knowledge Base + Ambiente: dev + projeto: Assistente matricula + responsavel: infra@ifsp.edu.br + centro-de-custo: ti-geral-2026 + criticidade: alta + data-de-criacao: 13/03/2026 + backup: não + servico: Armazenamento + setor: ti + campus: capivari + area: infraestrutura + prospeccao: sim + poc: sim + \ No newline at end of file diff --git a/infra/s3/main.py b/infra/s3/__main__.py similarity index 84% rename from infra/s3/main.py rename to infra/s3/__main__.py index 3748964..4c07f35 100644 --- a/infra/s3/main.py +++ b/infra/s3/__main__.py @@ -4,6 +4,6 @@ config=pulumi.Config() buckets_config=config.require_object("buckets") tags=config.require_object("tags") for b in buckets_config: - bucket=aws.s3.BucketV2(b["name"],tags=tags) + bucket=aws.s3.Bucket(b["name"],tags=tags) pulumi.export("bucket_name",bucket.id) pulumi.export("bucket_arn",bucket.arn) \ No newline at end of file