You're viewing a demo portfolio

Join the waitlist
PRSM

multimodal_eval_guide

Active

Tool of IA-QA — 130+ QA & Dev Tools for AI Agents

declared in 1.0.0

Unified tool for multimodal AI evaluation: set action=guide for reference thresholds/interpretation (CLIP, FID, VQA), or set action=clip_score / fid_score / vqa_accuracy / pipeline to compute real metrics via HuggingFace Inference API and VLM BYOK calls. One tool for both reference and computation.

Parameters schema

{
  "type": "object",
  "properties": {
    "fid": {
      "type": "object",
      "description": "[pipeline] {real_images, generated_images} for FID.",
      "additionalProperties": true
    },
    "vqa": {
      "type": "object",
      "description": "[pipeline] VQA config object (same inputs as vqa_accuracy).",
      "additionalProperties": true
    },
    "clip": {
      "type": "object",
      "description": "[pipeline] {image_url, text} for CLIP.",
      "additionalProperties": true
    },
    "text": {
      "type": "string",
      "description": "[clip_score only] Text description to compare against the image."
    },
    "model": {
      "type": "string",
      "description": "[vqa_accuracy] VLM model ID (default: gpt-4o)."
    },
    "score": {
      "type": "number",
      "description": "[guide only] Optional score value to interpret."
    },
    "action": {
      "enum": [
        "guide",
        "clip_score",
        "fid_score",
        "vqa_accuracy",
        "pipeline"
      ],
      "type": "string",
      "description": "guide (default) = reference thresholds/interpretation. clip_score/fid_score/vqa_accuracy = compute that metric. pipeline = run all three."
    },
    "metric": {
      "enum": [
        "clip_score",
        "fid",
        "vqa_accuracy",
        "all"
      ],
      "type": "string",
      "description": "[guide only] Metric to explain."
    },
    "api_key": {
      "type": "string",
      "description": "[vqa_accuracy] Your API key for the provider (BYOK)."
    },
    "image_url": {
      "type": "string",
      "description": "[clip_score/vqa_accuracy] Public URL of the image."
    },
    "test_cases": {
      "type": "array",
      "items": {
        "type": "object",
        "properties": {
          "question": {
            "type": "string"
          },
          "accepted_answers": {
            "type": "array",
            "items": {
              "type": "string"
            }
          }
        }
      },
      "description": "[vqa_accuracy] Array of {question, accepted_answers} objects."
    },
    "real_images": {
      "type": "array",
      "items": {
        "type": "string"
      },
      "description": "[fid_score] Array of real image URLs."
    },
    "image_base64": {
      "type": "string",
      "description": "[clip_score/vqa_accuracy] Base64-encoded image data."
    },
    "system_prompt": {
      "type": "string",
      "description": "[vqa_accuracy] Optional system prompt."
    },
    "image_mime_type": {
      "type": "string",
      "description": "[clip_score/vqa_accuracy] MIME type for base64 image."
    },
    "generated_images": {
      "type": "array",
      "items": {
        "type": "string"
      },
      "description": "[fid_score] Array of generated image URLs."
    }
  }
}

What this tool wraps· 0 endpoints

min confidence0.700.50

No endpoints wrapped at confidence ≥ 0.70.

Parent server

IA-QA — 130+ QA & Dev Tools for AI Agents

https://github.com/jcjamet/ia-qa

1/7 registries
View full server →