Implement GitHub Action to auto-label issues with provider keywords (#13537)

* Add auto issue labelling for llm_translation

* Add logging

* make the formatting better

* make the formatting better

* make the formatting better

* make the formatting better
This commit is contained in:
Sameer Kankute
2025-08-13 09:48:20 +05:30
committed by GitHub
parent e51c15705c
commit 74496eefea
2 changed files with 197 additions and 0 deletions

133
.github/scripts/scan_keywords.py vendored Normal file
View File

@@ -0,0 +1,133 @@
#!/usr/bin/env python3
import json
import os
import sys
import urllib.request
import urllib.error
def read_event_payload() -> dict:
event_path = os.environ.get("GITHUB_EVENT_PATH")
if not event_path or not os.path.exists(event_path):
return {}
with open(event_path, "r", encoding="utf-8") as f:
return json.load(f)
def get_issue_text(event: dict) -> tuple[str, str, int, str, str]:
issue = event.get("issue") or {}
title = (issue.get("title") or "").strip()
body = (issue.get("body") or "").strip()
number = issue.get("number") or 0
html_url = issue.get("html_url") or ""
author = ((issue.get("user") or {}).get("login") or "").strip()
return title, body, number, html_url, author
def detect_keywords(text: str, keywords: list[str]) -> list[str]:
lowered = text.lower()
matches = []
for keyword in keywords:
k = keyword.strip().lower()
if not k:
continue
if k in lowered:
matches.append(keyword.strip())
# Deduplicate while preserving order
seen = set()
unique_matches = []
for m in matches:
if m not in seen:
unique_matches.append(m)
seen.add(m)
return unique_matches
def send_webhook(webhook_url: str, payload: dict) -> None:
if not webhook_url:
return
data = json.dumps(payload).encode("utf-8")
req = urllib.request.Request(
webhook_url,
data=data,
headers={"Content-Type": "application/json"},
method="POST",
)
try:
with urllib.request.urlopen(req, timeout=10) as resp:
resp.read()
except urllib.error.HTTPError as e:
print(f"Webhook HTTP error: {e.code} {e.reason}", file=sys.stderr)
except urllib.error.URLError as e:
print(f"Webhook URL error: {e.reason}", file=sys.stderr)
except Exception as e:
print(f"Webhook unexpected error: {e}", file=sys.stderr)
def _excerpt(text: str, max_len: int = 400) -> str:
if not text:
return ""
# Keep original formatting
if len(text) <= max_len:
return text
return text[: max_len - 1] + ""
def main() -> int:
event = read_event_payload()
if not event:
print("::warning::No event payload found; exiting without labeling.")
return 0
# Read issue details
title, body, number, html_url, author = get_issue_text(event)
combined_text = f"{title}\n\n{body}".strip()
# Keywords from env or defaults
keywords_env = os.environ.get("KEYWORDS", "")
default_keywords = ["azure", "openai", "bedrock", "vertexai", "vertex ai", "anthropic"]
keywords = [k.strip() for k in keywords_env.split(",")] if keywords_env else default_keywords
matches = detect_keywords(combined_text, keywords)
found = bool(matches)
# Emit outputs
github_output = os.environ.get("GITHUB_OUTPUT")
if github_output:
with open(github_output, "a", encoding="utf-8") as fh:
fh.write(f"found={'true' if found else 'false'}\n")
fh.write(f"matches={','.join(matches)}\n")
# Optional webhook notification
webhook_url = os.environ.get("PROVIDER_ISSUE_WEBHOOK_URL", "").strip()
if found and webhook_url:
repo_full = (event.get("repository") or {}).get("full_name", "")
title_part = f"*{title}*" if title else "New issue"
author_part = f" by @{author}" if author else ""
body_preview = _excerpt(body)
preview_block = f"\n{body_preview}" if body_preview else ""
payload = {
"text": (
f"New issue 🚨\n"
f"{title_part}\n\n{preview_block}\n"
f"<{html_url}|View issue>\n"
f"Author: {author}"
)
}
send_webhook(webhook_url, payload)
# Print a short log line for Actions UI
if found:
print(f"Detected provider keywords: {', '.join(matches)}")
else:
print("No provider keywords detected.")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,64 @@
name: Issue Keyword Labeler
on:
issues:
types:
- opened
jobs:
scan-and-label:
runs-on: ubuntu-latest
permissions:
issues: write
contents: read
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Scan for provider keywords
id: scan
env:
PROVIDER_ISSUE_WEBHOOK_URL: ${{ secrets.PROVIDER_ISSUE_WEBHOOK_URL }}
KEYWORDS: azure,openai,bedrock,vertexai,vertex ai,anthropic
run: python3 .github/scripts/scan_keywords.py
- name: Ensure label exists
if: steps.scan.outputs.found == 'true'
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const labelName = 'llm translation';
try {
await github.rest.issues.getLabel({
owner: context.repo.owner,
repo: context.repo.repo,
name: labelName
});
} catch (error) {
if (error.status === 404) {
await github.rest.issues.createLabel({
owner: context.repo.owner,
repo: context.repo.repo,
name: labelName,
color: 'c1ff72',
description: 'Issues related to LLM provider translation/mapping'
});
} else {
throw error;
}
}
- name: Add label to the issue
if: steps.scan.outputs.found == 'true'
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
await github.rest.issues.addLabels({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
labels: ['llm translation']
});