Adds starting files

2026-01-16 17:45:22 -03:00
parent da8f2984c2
commit c31d089efb
37 changed files with 2560 additions and 0 deletions
--- a/code/app/backend/BDAgent.py
+++ b/code/app/backend/BDAgent.py
@@ -0,0 +1,342 @@
+"""
+LangGraph Agent using AWS Bedrock Cross-Region Inference Profile with Tools
+
+This script demonstrates how to create a LangGraph agent that uses
+an AWS Bedrock inference profile with custom tools (add and multiply).
+"""
+
+import boto3
+from typing import TypedDict, Annotated
+from langgraph.graph import StateGraph, END
+from langchain_aws import ChatBedrockConverse
+from langchain_core.tools import tool
+from langchain_core.messages import HumanMessage, AIMessage, ToolMessage, SystemMessage
+import operator
+import json
+import time
+from langfuse import Langfuse
+from langfuse.langchain import CallbackHandler
+from botocore.exceptions import ClientError
+import os
+from backend.utils import dynamodb_read_table as drt
+WORKGROUP = "iceberg-workgroup"
+DATABASE = "dnx_warehouse"
+def get_secret():
+
+    secret_name = "assistente-db-secrets-manager"
+    region_name = "us-east-1"
+
+    # Create a Secrets Manager client
+    session = boto3.session.Session()
+    client = session.client(
+        service_name='secretsmanager',
+        region_name=region_name
+    )
+
+    try:
+        get_secret_value_response = client.get_secret_value(
+            SecretId=secret_name
+        )
+    except ClientError as e:
+        # For a list of exceptions thrown, see
+        # https://docs.aws.amazon.com/secretsmanager/latest/apireference/API_GetSecretValue.html
+        raise e
+
+    secret = get_secret_value_response['SecretString']
+    return secret
+secrets=json.loads(get_secret())
+langfuse = Langfuse(
+    public_key=secrets['LANGFUSE-PUBLIC-KEY'],
+    secret_key=secrets['LANGFUSE-SECRET-KEY'],
+    host=os.environ["LANGFUSE_HOST"]
+)
+session = boto3.Session()
+athena = session.client("athena", region_name="us-east-1")
+
+# ==============================================
+# QUERY
+# ==============================================
+
+def exec_athena_query(query):
+    print("Executando query no Athena...")
+    response = athena.start_query_execution(
+        QueryString=query,
+        QueryExecutionContext={"Database": DATABASE},
+        WorkGroup=WORKGROUP
+    )
+
+    query_execution_id = response["QueryExecutionId"]
+    print(f"QueryExecutionId: {query_execution_id}")
+
+    # ==============================================
+    # AGUARDAR RESULTADO
+    # ==============================================
+
+    while True:
+        result = athena.get_query_execution(QueryExecutionId=query_execution_id)
+        state = result["QueryExecution"]["Status"]["State"]
+
+        if state in ["SUCCEEDED", "FAILED", "CANCELLED"]:
+            print("Estado final:", state)
+            break
+
+        print("Aguardando execução...")
+        time.sleep(1)
+    if state == "SUCCEEDED":
+        output = athena.get_query_results(QueryExecutionId=query_execution_id)
+        print(f"\n🔧 [TOOL CALLED] consult answer")
+        return output["ResultSet"]["Rows"]
+    else:
+        print("Erro ao executar a query.")
+# Define tools
+
+
+
+# Define@tool the agent state
+class AgentState(TypedDict):
+    messages: Annotated[list, operator.add]
+    current_step: str
+
+
+# Initialize Bedrock client with inference profile
+def create_bedrock_llm(model_id: str, region: str = "us-east-1"):
+    """
+    Create a ChatBedrock instance using a model ID.
+
+    Args:
+        model_id: Bedrock model ID (e.g., anthropic.claude-haiku-4-5-20251001-v1:0)
+        region: AWS region (default: us-east-1)
+
+    Returns:
+        ChatBedrock instance configured with the model
+    """
+    # Determine provider and model_kwargs based on model ID
+    MODEL_ARNS = {
+            "anthropic.claude-haiku-4-5-20251001-v1:0": "arn:aws:bedrock:us-east-1:305427701314:inference-profile/global.anthropic.claude-haiku-4-5-20251001-v1:0",
+            "anthropic.claude-sonnet-4-5-20250929-v1:0": "arn:aws:bedrock:us-east-1:305427701314:inference-profile/global.anthropic.claude-sonnet-4-5-20250929-v1:0",
+            "meta.llama4-maverick-17b-instruct-v1:0": "arn:aws:bedrock:us-east-1:305427701314:inference-profile/us.meta.llama4-maverick-17b-instruct-v1:0",
+            "meta.llama4-scout-17b-instruct-v1:0": "arn:aws:bedrock:us-east-1:305427701314:inference-profile/us.meta.llama4-scout-17b-instruct-v1:0",
+            "amazon.nova-lite-v1:0": "arn:aws:bedrock:us-east-1:305427701314:inference-profile/us.amazon.nova-lite-v1:0",
+            "amazon.nova-pro-v1:0": "arn:aws:bedrock:us-east-1:305427701314:inference-profile/us.amazon.nova-pro-v1:0",
+            "amazon.nova-2-lite-v1:0": "arn:aws:bedrock:us-east-1:305427701314:inference-profile/global.amazon.nova-2-lite-v1:0"
+    }
+    PROVIDER={
+            "anthropic.claude-haiku-4-5-20251001-v1:0": "anthropic",
+            "anthropic.claude-sonnet-4-5-20250929-v1:0": "anthropic",
+            "meta.llama4-maverick-17b-instruct-v1:0": "meta",
+            "meta.llama4-scout-17b-instruct-v1:0": "meta",
+            "amazon.nova-lite-v1:0": "amazon",
+            "amazon.nova-pro-v1:0": "amazon",
+            "amazon.nova-2-lite-v1:0": "amazon"
+    }
+    prefix={
+        "anthropic.claude-haiku-4-5-20251001-v1:0": "global",
+            "anthropic.claude-sonnet-4-5-20250929-v1:0": "global",
+            "meta.llama4-maverick-17b-instruct-v1:0": "us",
+            "meta.llama4-scout-17b-instruct-v1:0": "us",
+            "amazon.nova-lite-v1:0": "us",
+            "amazon.nova-pro-v1:0": "us",
+            "amazon.nova-2-lite-v1:0": "global"   
+    }
+    llm = ChatBedrockConverse(
+        model_id=prefix[model_id]+"."+model_id,
+        region_name=region,
+        provider=PROVIDER[model_id],
+        max_tokens=2048,
+        temperature=0.7
+    )
+
+    # Bind tools to the LLM
+    #tools = [consult_answers,count_table_rows]
+    tools=[]
+    llm_with_tools = llm.bind_tools(tools)
+
+    return llm_with_tools
+
+
+# Define agent nodes
+def call_model(state: AgentState, llm) -> AgentState:
+    """Call the LLM with tools."""
+    print(f"[MODEL] Calling Bedrock inference profile...")
+
+    messages = state["messages"]
+    langfuse_handler = CallbackHandler()
+    config = {"configurable": {"thread_id": "abc123"},"callbacks": [langfuse_handler]}
+    response = llm.invoke(messages,config=config)
+    state["current_step"] = "model_called"
+    return {"messages": [response]}
+
+
+def call_tools(state: AgentState) -> AgentState:
+    """Execute any tool calls from the LLM response."""
+    print(f"[TOOLS] Checking for tool calls...")
+    
+    messages = state["messages"]
+    last_message = messages[-1]
+    
+    # Check if there are tool calls
+    if hasattr(last_message, 'tool_calls') and last_message.tool_calls:
+        print(f"[TOOLS] Found {len(last_message.tool_calls)} tool call(s)")
+        
+        tool_messages = []
+        tools_map = {
+        }
+        
+        # Execute each tool call
+        for tool_call in last_message.tool_calls:
+            tool_name = tool_call["name"]
+            tool_args = tool_call["args"]
+            
+            print(f"[TOOLS] Executing: {tool_name}")
+            
+            # Call the appropriate tool
+            tool_func = tools_map[tool_name]
+            result = tool_func.invoke(tool_args)
+            
+            # Create tool message
+            tool_message = ToolMessage(
+                content=str(result),
+                tool_call_id=tool_call["id"]
+            )
+            tool_messages.append(tool_message)
+        
+        state["current_step"] = "tools_executed"
+        return {"messages": tool_messages}
+    else:
+        print(f"[TOOLS] No tool calls found")
+        state["current_step"] = "no_tools"
+        return {"messages": []}
+
+
+def should_continue(state: AgentState) -> str:
+    """Determine if we should continue to tools or end."""
+    messages = state["messages"]
+    last_message = messages[-1]
+    
+    # If there are tool calls, continue to tools node
+    if hasattr(last_message, 'tool_calls') and last_message.tool_calls:
+        print("[ROUTER] Routing to tools...")
+        return "tools"
+    
+    # Otherwise, end
+    print("[ROUTER] No more tool calls, ending...")
+    return "end"
+
+
+# Build the LangGraph agent
+def create_agent(inference_profile_arn: str, region: str = "us-east-1"):
+    """
+    Create a LangGraph agent that uses Bedrock inference profile with tools.
+
+    Args:
+        inference_profile_arn: ARN of the cross-region inference profile
+        region: AWS region
+
+    Returns:
+        Compiled LangGraph workflow
+    """
+    # Initialize the LLM with tools
+    llm = create_bedrock_llm(inference_profile_arn, region)
+
+    # Create the graph
+    workflow = StateGraph(AgentState)
+
+    # Add nodes
+    workflow.add_node("model", lambda state: call_model(state, llm))
+    workflow.add_node("tools", call_tools)
+
+    # Define the workflow
+    workflow.set_entry_point("model")
+    
+    # Add conditional edges
+    workflow.add_conditional_edges(
+        "model",
+        should_continue,
+        {
+            "tools": "tools",
+            "end": END
+        }
+    )
+    
+    # After tools, go back to model
+    workflow.add_edge("tools", "model")
+
+    # Compile the graph
+    app = workflow.compile()
+
+    return app
+
+
+def main(user_query,history,model):
+    """Main execution function."""
+
+    # Configuration - Update with your actual inference profile ARN
+    
+    INFERENCE_PROFILE_ARN = model
+    REGION = "us-east-1"
+    # System prompt for the agent
+
+    SYSTEM_PROMPT=""" You are a analitical agent, with acess to monthly reports about Bacio di latte
+    <context>
+    A Bacio di Latte é uma rede de gelaterias artesanais fundada em São Paulo, Brasil, em 2011, pelos irmãos milaneses Edoardo e Luigi Tonolli, que trouxeram a tradição do gelato italiano com ingredientes de alta qualidade, resultando em um sorvete cremoso e fresco, produzido diariamente, sem gordura hidrogenada ou trans, e que se tornou popular não só no Brasil, mas também nos EUA, representando uma experiência autêntica de gelato. 
+    <\context>
+    <reports>
+    """+drt.read_table_as_xml("poc_dnx_monthly_summary","us-east-1")+""""
+    <\reports>
+    Here is the chat history:"""+history+"""
+    Aswer the user the best you can with the given information, if you don't know the answer or how to answer say so, only answer from what you know."""
+
+    print("=" * 60)
+    print("LangGraph Agent with AWS Bedrock Inference Profile + Tools")
+    print("=" * 60)
+    print(f"\nUsing inference profile: {INFERENCE_PROFILE_ARN}")
+    print(f"Region: {REGION}\n")
+    print("Available Tools:")
+    print("  - add_numbers(a, b): Add two numbers")
+    print("  - multiply_numbers(a, b): Multiply two numbers")
+    print("\nSystem Prompt: Configured ✓")
+    print("=" * 60)
+
+    # Create the agent
+    agent = create_agent(INFERENCE_PROFILE_ARN, REGION)
+
+    # Example query that requires tools
+
+    # Initialize state with system prompt
+    initial_state = {
+        "messages": [
+            SystemMessage(content=SYSTEM_PROMPT),
+            HumanMessage(content=user_query)
+        ],
+        "current_step": "init"
+    }
+
+    print(f"\nUser Query: {user_query}\n")
+    print("-" * 60)
+
+    # Run the agent
+    final_state = agent.invoke(initial_state)
+
+    # Display results
+    print("-" * 60)
+    print("\n[FINAL RESULT]")
+    print("\nConversation History:")
+    for i, msg in enumerate(final_state["messages"], 1):
+        if isinstance(msg, SystemMessage):
+            print(f"\n{i}. System: [System prompt configured]")
+        elif isinstance(msg, HumanMessage):
+            print(f"\n{i}. User: {msg.content}")
+        elif isinstance(msg, AIMessage):
+            if hasattr(msg, 'tool_calls') and msg.tool_calls:
+                print(f"\n{i}. AI: [Calling tools...]")
+            else:
+                print(f"\n{i}. AI: {msg.content}")
+        elif isinstance(msg, ToolMessage):
+            print(f"\n{i}. Tool Result: {msg.content}")
+    
+    print("\n" + "=" * 60)
+    print(f"Agent completed successfully. Final step: {final_state['current_step']}")
+    langfuse.flush()
+    return final_state['messages'][-1].content
+if __name__=="__main__":
+    main("oi","ancar_nps_tradicional","","")
--- a/code/app/backend/utils/dynamodb_read_table.py
+++ b/code/app/backend/utils/dynamodb_read_table.py
@@ -0,0 +1,209 @@
+"""
+DynamoDB Table Reader Script
+
+This script connects to AWS DynamoDB and reads all entries from a specified table.
+Outputs data in XML format with <period> tags containing the context XML content.
+
+Usage:
+    from dynamodb_read_table import read_table_as_xml
+    xml_content = read_table_as_xml("my-table-name")
+"""
+
+import re
+import boto3
+from botocore.exceptions import ClientError
+
+
+def clean_context_xml(context: str) -> str:
+    """
+    Remove XML declaration and <relatorio> tags from context content.
+
+    Args:
+        context: Raw XML content from DynamoDB
+
+    Returns:
+        Cleaned XML content without declaration and relatorio tags
+    """
+    # Remove XML declaration (e.g., <?xml version="1.0" encoding="UTF-8"?>)
+    context = re.sub(r'<\?xml[^?]*\?>\s*', '', context)
+
+    # Remove opening <relatorio> tag (with any attributes)
+    context = re.sub(r'<relatorio[^>]*>\s*', '', context)
+
+    # Remove closing </relatorio> tag
+    context = re.sub(r'\s*</relatorio>', '', context)
+
+    return context.strip()
+
+
+def remove_xml_declaration(content: str) -> str:
+    """
+    Remove only the XML declaration from content.
+
+    Args:
+        content: Raw XML content
+
+    Returns:
+        Content without XML declaration (keeps relatorio tags)
+    """
+    content = re.sub(r'<\?xml[^?]*\?>\s*', '', content)
+    return content.strip()
+
+
+def format_items_to_xml(items: list) -> str:
+    """
+    Format all DynamoDB items to XML format.
+
+    Each item's 'period' field becomes a <period> tag,
+    and the 'context' and 'dados_consolidados' fields are placed inside it.
+
+    Args:
+        items: List of DynamoDB items
+
+    Returns:
+        Complete XML formatted string with all items
+    """
+    xml_parts = []
+
+    for item in items:
+        period = item.get("period", "unknown")
+        context = item.get("context", "")
+        dados_consolidados = item.get("dados_consolidados", "")
+
+        # Clean the XML content
+        cleaned_context = clean_context_xml(context)
+        cleaned_dados = remove_xml_declaration(dados_consolidados)
+
+        xml_parts.append(f"<{period}>")
+        xml_parts.append(cleaned_context)
+        if cleaned_dados:
+            xml_parts.append(cleaned_dados)
+        xml_parts.append(f"</{period}>")
+        xml_parts.append("")  # Empty line between entries
+
+    return "\n".join(xml_parts)
+
+
+def get_dynamodb_client(region_name: str = "us-east-1"):
+    """Create and return a DynamoDB client."""
+    session = boto3.Session()
+    return session.client("dynamodb", region_name=region_name)
+
+
+def get_dynamodb_resource(region_name: str = "us-east-1"):
+    """Create and return a DynamoDB resource for higher-level operations."""
+    session = boto3.Session()
+    return session.resource("dynamodb", region_name=region_name)
+
+
+def scan_table(table_name: str, region_name: str = "us-east-1") -> list:
+    """
+    Scan a DynamoDB table and return all items.
+
+    Uses pagination to handle tables larger than 1MB response limit.
+
+    Args:
+        table_name: Name of the DynamoDB table to scan
+        region_name: AWS region where the table is located
+
+    Returns:
+        List of all items in the table
+    """
+    dynamodb = get_dynamodb_resource(region_name)
+    table = dynamodb.Table(table_name)
+
+    items = []
+    last_evaluated_key = None
+
+    try:
+        while True:
+            if last_evaluated_key:
+                response = table.scan(ExclusiveStartKey=last_evaluated_key)
+            else:
+                response = table.scan()
+
+            items.extend(response.get("Items", []))
+
+            last_evaluated_key = response.get("LastEvaluatedKey")
+            if not last_evaluated_key:
+                break
+
+        print(f"Successfully scanned {len(items)} items from table '{table_name}'")
+        return items
+
+    except ClientError as e:
+        error_code = e.response["Error"]["Code"]
+        error_message = e.response["Error"]["Message"]
+        print(f"Error scanning table: {error_code} - {error_message}")
+        raise
+
+
+def list_tables(region_name: str = "us-east-1") -> list:
+    """List all DynamoDB tables in the specified region."""
+    client = get_dynamodb_client(region_name)
+
+    tables = []
+    last_evaluated_table_name = None
+
+    try:
+        while True:
+            if last_evaluated_table_name:
+                response = client.list_tables(ExclusiveStartTableName=last_evaluated_table_name)
+            else:
+                response = client.list_tables()
+
+            tables.extend(response.get("TableNames", []))
+
+            last_evaluated_table_name = response.get("LastEvaluatedTableName")
+            if not last_evaluated_table_name:
+                break
+
+        return tables
+
+    except ClientError as e:
+        error_code = e.response["Error"]["Code"]
+        error_message = e.response["Error"]["Message"]
+        print(f"Error listing tables: {error_code} - {error_message}")
+        raise
+
+
+def get_table_info(table_name: str, region_name: str = "us-east-1") -> dict:
+    """Get metadata information about a DynamoDB table."""
+    client = get_dynamodb_client(region_name)
+
+    try:
+        response = client.describe_table(TableName=table_name)
+        table_info = response.get("Table", {})
+
+        return {
+            "TableName": table_info.get("TableName"),
+            "TableStatus": table_info.get("TableStatus"),
+            "ItemCount": table_info.get("ItemCount"),
+            "TableSizeBytes": table_info.get("TableSizeBytes"),
+            "KeySchema": table_info.get("KeySchema"),
+            "AttributeDefinitions": table_info.get("AttributeDefinitions"),
+            "CreationDateTime": str(table_info.get("CreationDateTime")),
+        }
+
+    except ClientError as e:
+        error_code = e.response["Error"]["Code"]
+        error_message = e.response["Error"]["Message"]
+        print(f"Error describing table: {error_code} - {error_message}")
+        raise
+
+
+def read_table_as_xml(table_name: str, region_name: str = "us-east-1") -> str:
+    """
+    Read all entries from a DynamoDB table and return as XML string.
+
+    Args:
+        table_name: Name of the DynamoDB table to read
+        region_name: AWS region where the table is located (default: us-east-1)
+
+    Returns:
+        XML formatted string with all items wrapped in <period> tags
+    """
+    items = scan_table(table_name, region_name)
+    return format_items_to_xml(items)
+if __name__=="__main__":
+    print(read_table_as_xml("poc_dnx_monthly_summary","us-east-1"))