Feat: Pulumi S3

This commit is contained in:
2026-03-16 10:59:51 -03:00
parent 624f5dc7e6
commit a4d9583821
11 changed files with 74 additions and 157 deletions

2
.gitignore vendored
View File

@@ -5,4 +5,4 @@ venv/
.env .env
.env.* .env.*
pyproject.toml pyproject.toml
Makefile

View File

@@ -1,6 +1,5 @@
import os import os
TABLE = os.environ["TABLE"]
REGION = os.environ["REGION"] REGION = os.environ["REGION"]
AWS_ACCOUNT = os.environ["AWS_ACCOUNT"] AWS_ACCOUNT = os.environ["AWS_ACCOUNT"]
SECRET_NAME = os.environ["SECRET_NAME"] KNOWLEDGE_BASE_ID = os.environ["KNOWLEDGE_BASE_ID"]

View File

@@ -4,9 +4,8 @@ import os
from botocore.exceptions import ClientError from botocore.exceptions import ClientError
from langfuse import Langfuse from langfuse import Langfuse
from .config import REGION, TABLE, SECRET_NAME from .config import REGION, SECRET_NAME
dynamodb = boto3.resource("dynamodb", region_name=REGION)
def get_secret() -> str: def get_secret() -> str:
@@ -19,35 +18,9 @@ def get_secret() -> str:
return response["SecretString"] return response["SecretString"]
secrets = json.loads(get_secret()) #secrets = json.loads(get_secret())
langfuse = Langfuse( #langfuse = Langfuse(
public_key=secrets["LANGFUSE-PUBLIC-KEY"], # public_key=secrets["LANGFUSE-PUBLIC-KEY"],
secret_key=secrets["LANGFUSE-SECRET-KEY"], # secret_key=secrets["LANGFUSE-SECRET-KEY"],
host=os.environ["LANGFUSE_HOST"], # host=os.environ["LANGFUSE_HOST"],
) #)
def get_contexto(dashboard: str) -> dict:
"""
Get contexto, filter, and items_disponiveis from DynamoDB for a given dashboard.
Returns:
Dict with 'contexto', 'filter', and 'items_disponiveis' keys
"""
try:
table = dynamodb.Table(TABLE)
response = table.get_item(Key={"id": dashboard + "_contexto"})
if "Item" not in response:
return {"contexto": "", "filter": "", "items_disponiveis": {}}
item = response["Item"]
return {
"contexto": item.get("contexto", ""),
"filter": item.get("filter_key", ""),
"items_disponiveis": item.get("itens_disponiveis", {}),
}
except ClientError as e:
error_message = e.response["Error"]["Message"]
return {"contexto": f"Error: {error_message}", "filter": "", "items_disponiveis": {}}

View File

@@ -2,17 +2,16 @@ from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langfuse.langchain import CallbackHandler from langfuse.langchain import CallbackHandler
from .config import REGION from .config import REGION
from .dynamo import langfuse, get_contexto
from .agent_bedrock import create_agent from .agent_bedrock import create_agent
from .tools import ReportTools from .tools import build_knowledge_base_tool
def main(user_query, history, model, base): def main(user_query, history, model="anthropic.claude-sonnet-4-5-20250929-v1:0"):
"""Main execution function.""" """Main execution function."""
report_tools = [] report_tools = [build_knowledge_base_tool()]
SYSTEM_PROMPT = """""" SYSTEM_PROMPT = """Você é um assistente de matrículas para o campus capivari do instituo federal de são paulo, tem acesso a uma tool que acessa uma knowledge base com informações sobre tanto a matricula dos alunos do técnico quanto superior do procedimento iterno, não responda perguntas sobre o meio de ingresso SISU."""
langfuse_handler = CallbackHandler() langfuse_handler = CallbackHandler()
agent = create_agent(model, REGION, tools=report_tools) agent = create_agent(model, REGION, tools=report_tools)
@@ -24,7 +23,7 @@ def main(user_query, history, model, base):
"current_step": "init", "current_step": "init",
} }
config = {"callbacks": [langfuse_handler], "tags": [base]} config = {"callbacks": [langfuse_handler]}
final_state = agent.invoke(initial_state, config=config) final_state = agent.invoke(initial_state, config=config)
total_input_tokens = 0 total_input_tokens = 0
@@ -33,8 +32,6 @@ def main(user_query, history, model, base):
if isinstance(msg, AIMessage) and hasattr(msg, "usage_metadata") and msg.usage_metadata: if isinstance(msg, AIMessage) and hasattr(msg, "usage_metadata") and msg.usage_metadata:
total_input_tokens += msg.usage_metadata.get("input_tokens", 0) total_input_tokens += msg.usage_metadata.get("input_tokens", 0)
total_output_tokens += msg.usage_metadata.get("output_tokens", 0) total_output_tokens += msg.usage_metadata.get("output_tokens", 0)
langfuse.flush()
return { return {
"response": final_state["messages"][-1].content, "response": final_state["messages"][-1].content,
"input_tokens": total_input_tokens, "input_tokens": total_input_tokens,

View File

@@ -1,85 +1,21 @@
from botocore.exceptions import ClientError from langchain_aws import AmazonKnowledgeBasesRetriever
from langchain_core.tools import StructuredTool from langchain_core.tools.retriever import create_retriever_tool
from .config import TABLE from .config import REGION, KNOWLEDGE_BASE_ID
from .dynamo import dynamodb
class ReportTools: def build_knowledge_base_tool():
def __init__(self, id_mapping: dict[str, str]): retriever = AmazonKnowledgeBasesRetriever(
self.id_mapping = id_mapping knowledge_base_id=KNOWLEDGE_BASE_ID,
retrieval_config={"vectorSearchConfiguration": {"numberOfResults": 5}},
def get_variable_value(self, id: str, variable: str) -> str: region_name=REGION,
""" )
Get a specific variable's value from DynamoDB for a specific id. return create_retriever_tool(
retriever,
Args: name="consultar_base_conhecimento",
id: The id of the data description=(
variable: The variable/column name to retrieve from the table "Consulta a base de conhecimento com informações sobre procedimentos internos de matrícula "
"para cursos técnicos e superiores do campus Capivari do IFSP. "
Returns: "Use esta ferramenta para responder dúvidas sobre matrícula."
The content of the specified variable for the given id ),
""" )
real_id = self.id_mapping.get(id, id)
try:
table = dynamodb.Table(TABLE)
response = table.get_item(Key={"id": real_id})
if "Item" not in response:
return f"No report found for month: {id}"
item = response["Item"]
content = item.get(variable, "")
if not content:
return f"Variable '{variable}' not found for month: {id}"
return f"<{id}>\n{content}\n</{id}>"
except ClientError as e:
error_message = e.response["Error"]["Message"]
return f"Error fetching report: {error_message}"
def get_variables_list(self, id: str) -> str:
"""
Get the list of variables available in the table for a specific month.
Args:
id: The id of the data
Returns:
The list of available variables/keys for the specified data
"""
real_id = self.id_mapping.get(id, id)
try:
table = dynamodb.Table(TABLE)
response = table.get_item(Key={"id": real_id})
if "Item" not in response:
return f"No data found for month: {id}"
item = response["Item"]
chaves_consolidadas = item.get("chaves_consolidadas", "")
if not chaves_consolidadas:
return f"No consolidated keys found for id: {id}"
return chaves_consolidadas
except ClientError as e:
error_message = e.response["Error"]["Message"]
return f"Error fetching consolidated keys: {error_message}"
def as_tools(self) -> list:
return [
StructuredTool.from_function(
self.get_variable_value,
name="get_variable_value",
description="Get a specific variable's data from DynamoDB for a specific id.",
),
StructuredTool.from_function(
self.get_variables_list,
name="get_variable_list",
description="Get the list of variables available in the table for a specific id.",
),
]

View File

@@ -41,7 +41,7 @@ if prompt := st.chat_input("Type your message here..."):
# Simulate streaming response (replace with actual API call) # Simulate streaming response (replace with actual API call)
result = orquestrador.main(prompt,str(st.session_state.messages),selected_value,base) result = orquestrador.main(prompt,str(st.session_state.messages))
full_response = result["response"] full_response = result["response"]
# Simulate typing effect # Simulate typing effect

View File

@@ -1,8 +1,8 @@
boto3==1.42.10 boto3==1.42.10
langchain-aws==1.1.0 langchain-aws==1.3.0
langgraph==1.0.5 langgraph==1.0.9
langchain==1.2.0 langchain==1.2.0
streamlit==1.52.2 streamlit==1.54.0
langfuse==3.11.2 langfuse==3.14.5
fastapi==0.129.0 fastapi==0.133.0
uvicorn==0.41.0 uvicorn==0.41.0

View File

@@ -1,32 +1,32 @@
config: config:
aws:region: us-east-1 aws:region: us-east-1
app-ecs:account_id: "305427701314" # dnxbrasil-nonprod app-ecs:account_id: "305427701314" # dnxbrasil-nonprod
app-ecs:project_name: assistente-analitico app-ecs:project_name: assistente-matricula
app-ecs:environment: dev app-ecs:environment: dev
# app-ecs:bedrock_api_key: # app-ecs:bedrock_api_key:
# secure: you-can-put-your-pulumi-encrypted-secure-string-here # secure: you-can-put-your-pulumi-encrypted-secure-string-here
app-ecs:tags: app-ecs:tags:
project: assistente-analitico-db-dev project: assistente-matricula-dev
env: dev # dev, test, stage, prod env: dev # dev, test, stage, prod
account: nonprod # prod, nonprod, dataScience account: nonprod # prod, nonprod, dataScience
costCenter: AI # AWSGeneral, AI, data, productName costCenter: AI # AWSGeneral, AI, data, productName
owner: AI # team or a preson responsible owner: AI # team or a preson responsible
app-ecs:network: app-ecs:network:
vpc_id: vpc-17ceb96c vpc_id: vpc-08b91683af59eab85
alb_internal: false alb_internal: false
alb_subnet_ids: # 2+ private subnets if alb_internal else public subnets in the same region and vpc alb_subnet_ids: # 2+ private subnets if alb_internal else public subnets in the same region and vpc
- subnet-0de9f056635629827 - subnet-07886057270d8c0f3
- subnet-09cda74f27c543521 - subnet-0b40078e0a65d2a0c
alb_allow_ingress_cidr: alb_allow_ingress_cidr:
- 3.14.44.224/32 - 3.14.44.224/32
ecs_subnet_ids: ecs_subnet_ids:
- subnet-0f50f25a2fbb054d4 - subnet-07ccfbdb115f03d5e
- subnet-043a427630309c2f4 - subnet-0e05d1de8407fb798
app-ecs:ecs: app-ecs:ecs:
- task_name: assisnte-analitico-db-dev - task_name: assistente-matricula-dev
ecr_repo_name: assistente-analitico-db-dev ecr_repo_name: assistente-matricula-dev
ecr_image_tag: latest ecr_image_tag: latest
ecr_image_digest: sha256:0bd3a927df4367ba29dbd173e0414d884e973c37599a3f6241341e8d190e827b ecr_image_digest: sha256:a4ff97f1bbc3ba14d40b32e68f6fc431dd5f05c70aff2f8b914f503e62bc7230
cpu: 256 cpu: 256
memory: 512 memory: 512
desired_count: 1 desired_count: 1
@@ -46,14 +46,14 @@ config:
target_port: 8000 target_port: 8000
container_port: 8000 container_port: 8000
env_variables: env_variables:
LANGFUSE_HOST: http://172.31.252.176:3000 LANGFUSE_HOST: http://0.0.0.0/0
TABLE: poc_dnx_monthly_summary
REGION: us-east-1 REGION: us-east-1
AWS_ACCOUNT: "305427701314" AWS_ACCOUNT: "305427701314"
SECRET_NAME: assistente-db-secrets-manager KNOWLEDGE_BASE_ID: LBH9H6QTAK
#SECRET_NAME: assistente-db-secrets-manager
# SECRET_NAME: dev/ai-pge-doc-classification # SECRET_NAME: dev/ai-pge-doc-classification
# BEDROCK_REGION: us-east-1 # BEDROCK_REGION: us-east-1
# LANGCHAIN_TRACING_V2: "true" # LANGCHAIN_TRACING_V2: "true"
# LANGCHAIN_PROJECT: pge-doc-classification-dev # LANGCHAIN_PROJECT: pge-doc-classification-dev
app-ecs:cloudwatch: app-ecs:cloudwatch:
log_group_name: assistente-analitico-db-dev log_group_name: assistente-matricula-dev

View File

@@ -1,9 +0,0 @@
config:
aws:region: us-east-1
ia-bucket:
buckets:
- name: assistente-matricula-docs
- name: assistente-matricula-transcribe
tags:
Enviroment: dev
Owner: ai

View File

@@ -0,0 +1,21 @@
config:
aws:region: us-east-1
ia-bucket:buckets:
- name: br-edu-ifsp-capivari-s3-docs-matricula-dev
- name: br-edu-ifsp-capivari-s3-transcribe-matricula-dev
ia-bucket:tags:
Nome: Bucket Knowledge Base
Ambiente: dev
projeto: Assistente matricula
responsavel: infra@ifsp.edu.br
centro-de-custo: ti-geral-2026
criticidade: alta
data-de-criacao: 13/03/2026
backup: não
servico: Armazenamento
setor: ti
campus: capivari
area: infraestrutura
prospeccao: sim
poc: sim

View File

@@ -4,6 +4,6 @@ config=pulumi.Config()
buckets_config=config.require_object("buckets") buckets_config=config.require_object("buckets")
tags=config.require_object("tags") tags=config.require_object("tags")
for b in buckets_config: for b in buckets_config:
bucket=aws.s3.BucketV2(b["name"],tags=tags) bucket=aws.s3.Bucket(b["name"],tags=tags)
pulumi.export("bucket_name",bucket.id) pulumi.export("bucket_name",bucket.id)
pulumi.export("bucket_arn",bucket.arn) pulumi.export("bucket_arn",bucket.arn)