import BasePortfolioEntry from "./BasePortfolioEntry";

import viphy_logo from "../../Images/VIPHY/intro.png";
import viphy_pipeline from "../../Images/VIPHY/pipeline.png";



class VIPHY extends BasePortfolioEntry {
    static mypath='viphy'
    static page_title = "VIPHY: Probing \"Visible\" Physical Commonsense Knowledge";
    static page_image = viphy_logo;
    static page_briefs = "Build an automatic pipeline to calibrate and probe visible aspects of commonsense in visual language models.";

    static page_keywords = ["Weak Supervision", "Visual Language Models", "CommonSense", "NLP", "Computer Vision"]

    constructor(props) {
        super(props);
        this.research_title = VIPHY.page_title;
        this.research_highlights = [
            "Build an automatic pipeline to calibrate and probe visible aspects of commonsense in visual language models.",
        ];
        this.research_details = [
            "Vision-language models (VLMs) have shown " +
            "remarkable performance on visual reasoning " +
            "tasks (e.g. attributes, location). While such " +
            "tasks measure the requisite knowledge to " +
            "ground and reason over a given visual instance, " +
            "they do not, however, measure the ability of " +
            "VLMs to retain and generalize such knowledge. " +
            "In this work, we evaluate VLMs’ ability to acquire " +
            "“visible” physical knowledge – the information" +
            "that is easily accessible from images of " +
            "static scenes, particularly along the dimensions " +
            "of object color, size, and space. We build an " +
            "automatic pipeline to derive a comprehensive " +
            "knowledge resource for calibrating and probing " +
            "these models. Our results indicate a severe gap " +
            "between model and human performance across " +
            "all three dimensions. Furthermore, we demonstrate" +
            "that a caption pretrained LM significantly" +
            "outperforms VLMs on both size and spatial" +
            "tasks – highlighting that despite sufficient access" +
            "to ground language with visual modality, " +
            "they struggle to retain such knowledge.",
        ]
        this.images = [
            viphy_logo, viphy_pipeline
        ]
        this.citations = [
            "Singh, Shikhar, Ehsan Qasemi, and Muhao Chen. \"VIPHY: Probing\" Visible\" " +
            "Physical Commonsense Knowledge.\" arXiv preprint arXiv:2209.07000 (2022).",
        ]
    }

}

export default VIPHY;