-
Notifications
You must be signed in to change notification settings - Fork 7
200 lines (172 loc) · 6.15 KB
/
test.yml
File metadata and controls
200 lines (172 loc) · 6.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
name: Test cagent-action
on:
pull_request:
types: [opened, synchronize, reopened]
branches: [main]
push:
branches: [main]
permissions:
contents: read
jobs:
test-prompt-sanitization:
name: Prompt Sanitization Tests
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout code
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- name: Run prompt sanitization tests
run: |
cd tests
chmod +x test-local.sh
./test-local.sh
test-output-extraction:
name: Output Extraction Tests
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout code
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- name: Run output extraction tests
run: |
cd tests
chmod +x test-output-extraction.sh
./test-output-extraction.sh
test-job-summary:
name: Job Summary Format Tests
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout code
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- name: Run job summary tests
run: |
cd tests
chmod +x test-job-summary.sh
./test-job-summary.sh
test-security:
name: Security Tests
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout code
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- name: Run security tests
run: |
cd tests
chmod +x test-security.sh
./test-security.sh
test-exploits:
name: Exploit Tests
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout code
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- name: Run exploit tests
run: |
cd tests
chmod +x test-exploits.sh
./test-exploits.sh
test-pirate-agent:
name: Pirate Agent Test
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Check if fork PR
id: fork-check
run: |
# Use default empty string to handle edge cases (deleted branches, malformed events)
HEAD_REPO="${{ github.event.pull_request.head.repo.full_name || '' }}"
if [[ "${{ github.event_name }}" == "pull_request" && "$HEAD_REPO" != "${{ github.repository }}" && -n "$HEAD_REPO" ]]; then
echo "⏭️ Skipping - fork PR (secrets not available)"
echo "is_fork=true" >> $GITHUB_OUTPUT
else
echo "is_fork=false" >> $GITHUB_OUTPUT
fi
- name: Checkout code
if: steps.fork-check.outputs.is_fork != 'true'
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- name: Run test
if: steps.fork-check.outputs.is_fork != 'true'
id: pirate
uses: ./
with:
agent: agentcatalog/pirate
prompt: "What do we ship today?"
openai-api-key: ${{ secrets.OPENAI_API_KEY }}
- name: Validate output and exit code
if: steps.fork-check.outputs.is_fork != 'true'
run: |
OUTPUT_FILE="${{ steps.pirate.outputs.output-file }}"
# Check that exit code is 0 (success)
if [ "${{ steps.pirate.outputs.exit-code }}" != "0" ]; then
echo "❌ Agent failed with exit code: ${{ steps.pirate.outputs.exit-code }}"
exit 1
fi
echo "✅ Agent completed successfully with exit code 0"
# Check that output file exists
if [ ! -f "$OUTPUT_FILE" ]; then
echo "❌ Output file not found: $OUTPUT_FILE"
exit 1
fi
echo "✅ Output file found: $OUTPUT_FILE"
# Display the output for debugging
echo "--- Agent Output ---"
cat "$OUTPUT_FILE"
echo "--- End Output ---"
# Check that output is clean (no agent markers or metadata in output)
if grep -qF -- "--- Agent: root ---" "$OUTPUT_FILE"; then
echo "⚠️ Output still contains '--- Agent: root ---' marker (not fully cleaned)"
fi
# Check that output doesn't contain log metadata
if grep -qE "^(time=|level=)" "$OUTPUT_FILE"; then
echo "❌ Output contains log metadata (time= or level=) - cleaning failed"
exit 1
fi
echo "✅ Output is clean (no log metadata)"
# Check that there is actual content (non-empty, non-whitespace)
CONTENT=$(cat "$OUTPUT_FILE" | grep -v '^$' | head -n 5)
if [ -z "$CONTENT" ]; then
echo "❌ No content found in output file"
exit 1
fi
echo "✅ Found agent response content"
echo "Response preview: $(echo "$CONTENT" | head -n 1)"
test-invalid-agent:
name: Invalid Agent Test
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout code
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- name: Test should fail on invalid agent
id: invalid-agent
continue-on-error: true
uses: ./
with:
agent: agentcatalog/nonexistent
prompt: "This should fail"
openai-api-key: ${{ secrets.OPENAI_API_KEY }}
- name: Verify invalid agent failed
run: |
OUTPUT_FILE="${{ steps.invalid-agent.outputs.output-file }}"
# Check exit code OR check for error in output (cagent may exit 0 even on pull failure)
if [ "${{ steps.invalid-agent.outputs.exit-code }}" == "0" ]; then
# Exit code is 0, check if output contains error message
if [ -f "$OUTPUT_FILE" ] && grep -q "failed to pull" "$OUTPUT_FILE"; then
echo "✅ Invalid agent correctly failed (error in output)"
else
echo "❌ Invalid agent should have failed but succeeded with no error"
exit 1
fi
else
echo "✅ Invalid agent correctly failed (non-zero exit code)"
fi