cagent-action/.github/workflows/test.yml at 11d0e3b14260509ba70b2efb27454ed97587ba83 · docker/cagent-action · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
name: Test cagent-action

on:
  pull_request:
    types: [opened, synchronize, reopened]
    branches: [main]
  push:
    branches: [main]

permissions:
  contents: read

jobs:
  test-prompt-sanitization:
    name: Prompt Sanitization Tests
    runs-on: ubuntu-latest
    permissions:
      contents: read
    steps:
      - name: Checkout code
        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0

      - name: Run prompt sanitization tests
        run: |
          cd tests
          chmod +x test-local.sh
          ./test-local.sh

  test-output-extraction:
    name: Output Extraction Tests
    runs-on: ubuntu-latest
    permissions:
      contents: read
    steps:
      - name: Checkout code
        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0

      - name: Run output extraction tests
        run: |
          cd tests
          chmod +x test-output-extraction.sh
          ./test-output-extraction.sh

  test-job-summary:
    name: Job Summary Format Tests
    runs-on: ubuntu-latest
    permissions:
      contents: read
    steps:
      - name: Checkout code
        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0

      - name: Run job summary tests
        run: |
          cd tests
          chmod +x test-job-summary.sh
          ./test-job-summary.sh

  test-security:
    name: Security Tests
    runs-on: ubuntu-latest
    permissions:
      contents: read
    steps:
      - name: Checkout code
        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0

      - name: Run security tests
        run: |
          cd tests
          chmod +x test-security.sh
          ./test-security.sh

  test-exploits:
    name: Exploit Tests
    runs-on: ubuntu-latest
    permissions:
      contents: read
    steps:
      - name: Checkout code
        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0

      - name: Run exploit tests
        run: |
          cd tests
          chmod +x test-exploits.sh
          ./test-exploits.sh

  test-pirate-agent:
    name: Pirate Agent Test
    runs-on: ubuntu-latest
    permissions:
      contents: read
    steps:
      - name: Check if fork PR
        id: fork-check
        run: |
          # Use default empty string to handle edge cases (deleted branches, malformed events)
          HEAD_REPO="${{ github.event.pull_request.head.repo.full_name || '' }}"
          if [[ "${{ github.event_name }}" == "pull_request" && "$HEAD_REPO" != "${{ github.repository }}" && -n "$HEAD_REPO" ]]; then
            echo "⏭️ Skipping - fork PR (secrets not available)"
            echo "is_fork=true" >> $GITHUB_OUTPUT
          else
            echo "is_fork=false" >> $GITHUB_OUTPUT
          fi

      - name: Checkout code
        if: steps.fork-check.outputs.is_fork != 'true'
        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0

      - name: Run test
        if: steps.fork-check.outputs.is_fork != 'true'
        id: pirate
        uses: ./
        with:
          agent: agentcatalog/pirate
          prompt: "What do we ship today?"
          openai-api-key: ${{ secrets.OPENAI_API_KEY }}

      - name: Validate output and exit code
        if: steps.fork-check.outputs.is_fork != 'true'
        run: |
          OUTPUT_FILE="${{ steps.pirate.outputs.output-file }}"

          # Check that exit code is 0 (success)
          if [ "${{ steps.pirate.outputs.exit-code }}" != "0" ]; then
            echo "❌ Agent failed with exit code: ${{ steps.pirate.outputs.exit-code }}"
            exit 1
          fi
          echo "✅ Agent completed successfully with exit code 0"

          # Check that output file exists
          if [ ! -f "$OUTPUT_FILE" ]; then
            echo "❌ Output file not found: $OUTPUT_FILE"
            exit 1
          fi
          echo "✅ Output file found: $OUTPUT_FILE"

          # Display the output for debugging
          echo "--- Agent Output ---"
          cat "$OUTPUT_FILE"
          echo "--- End Output ---"

          # Check that output is clean (no agent markers or metadata in output)
          if grep -qF -- "--- Agent: root ---" "$OUTPUT_FILE"; then
            echo "⚠️  Output still contains '--- Agent: root ---' marker (not fully cleaned)"
          fi

          # Check that output doesn't contain log metadata
          if grep -qE "^(time=|level=)" "$OUTPUT_FILE"; then
            echo "❌ Output contains log metadata (time= or level=) - cleaning failed"
            exit 1
          fi
          echo "✅ Output is clean (no log metadata)"

          # Check that there is actual content (non-empty, non-whitespace)
          CONTENT=$(cat "$OUTPUT_FILE" | grep -v '^$' | head -n 5)

          if [ -z "$CONTENT" ]; then
            echo "❌ No content found in output file"
            exit 1
          fi

          echo "✅ Found agent response content"
          echo "Response preview: $(echo "$CONTENT" | head -n 1)"

  test-invalid-agent:
    name: Invalid Agent Test
    runs-on: ubuntu-latest
    permissions:
      contents: read
    steps:
      - name: Checkout code
        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0

      - name: Test should fail on invalid agent
        id: invalid-agent
        continue-on-error: true
        uses: ./
        with:
          agent: agentcatalog/nonexistent
          prompt: "This should fail"
          openai-api-key: ${{ secrets.OPENAI_API_KEY }}

      - name: Verify invalid agent failed
        run: |
          OUTPUT_FILE="${{ steps.invalid-agent.outputs.output-file }}"

          # Check exit code OR check for error in output (cagent may exit 0 even on pull failure)
          if [ "${{ steps.invalid-agent.outputs.exit-code }}" == "0" ]; then
            # Exit code is 0, check if output contains error message
            if [ -f "$OUTPUT_FILE" ] && grep -q "failed to pull" "$OUTPUT_FILE"; then
              echo "✅ Invalid agent correctly failed (error in output)"
            else
              echo "❌ Invalid agent should have failed but succeeded with no error"
              exit 1
            fi
          else
            echo "✅ Invalid agent correctly failed (non-zero exit code)"
          fi