WARNING: THIS SITE IS A MIRROR OF GITHUB.COM / IT CANNOT LOGIN OR REGISTER ACCOUNTS / THE CONTENTS ARE PROVIDED AS-IS / THIS SITE ASSUMES NO RESPONSIBILITY FOR ANY DISPLAYED CONTENT OR LINKS / IF YOU FOUND SOMETHING MAY NOT GOOD FOR EVERYONE, CONTACT ADMIN AT ilovescratch@foxmail.com
Skip to content

marimo+quarto+duckdb dependency error (even duckdb is a declared dependency) #22

@kascesar

Description

@kascesar

Hi! Hope you may help.

I am a user of Quarto to publish on confluence, that means quarto is indispensable for me.
That marimo is so awesome, want to integrate it to my workflow to publish my research and share with my work team.

ill show my source code then ill show the error. First let explain the problem and what thing i do to get the error.

Opening marimo with sandbox for manage the dependencies (using uv as backend)

marimo edit --sandbox FRD-estudio-nuevo.py

code

show code
# /// script
# requires-python = ">=3.12"
# dependencies = [
#     "duckdb==1.3.0",
#     "marimo",
#     "matplotlib==3.10.3",
#     "numpy",
#     "pandas==2.2.3",
#     "plotly==6.1.2",
#     "polars",
#     "pyarrow",
#     "seaborn==0.13.2",
#     "sqlglot",
# ]
# ///

import marimo

__generated_with = "0.13.15"
app = marimo.App(width="medium", sql_output="pandas")


@app.cell(hide_code=True)
def _(mo):
    mo.md(
        f"""
    # Nuevos lineamientos para *FRD*

    ## Introducción

    Para este proyecto, es necesario construir una nueva base sólida en la que basar análisis exploratorio; Algunas de las definiciones deben ser creadas basandose en ciertas preguntas lógicas:

    - ¿Que es un mal uso de la Banda?
    - ¿Que es riegos?
    """
    )
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(
        f"""
    ## Mal uso de la Banda

    *def*: Se define "*mal uso de la banda*" aquellas interacciones con la banda las cuales tengan por finalidad: 

    * Engañar al sistema forzandolo a medir estados alterados
    * Remover el sistema sin motivo de fuerza aparente entorpeciendo las mediciones
    * Utilizar el sistema en una proporcion bastante por debajo de la media de utilizacion del resto de *Operadores*
      {mo.callout(
        mo.md(  
          '''
          En este caso, es importante hablar de **proporción de tiempo**  y **no cantidad de tiempo**,
          ya que la duración de un turno puede variar de manera bien justificada.
          '''
      ),
      kind="info"

     )}
    """
    )
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(
        f"""
    ## Riesgo

    Podemos referirnos al riesgo en diferentes maneras, en este apartado nos referimos al riesgo en el contexto donde se asume:

    - *Operador* ha demostrado que utiliza adecuadamente el sistema.
    - El tiempo de utilizacion es representativo.
    - El funcionamiento de la banda está en parametros normales
    """
    )
    return


@app.cell
def _():
    import marimo as mo
    import duckdb
    import pandas as pd
    import seaborn as sns
    import matplotlib.pyplot as plt
    import datetime
    from datetime import timedelta

    sns.set_context("notebook")

    fatiguelog = duckdb.connect("~/fatiguelog")
    return fatiguelog, mo, pd, plt, sns, timedelta


@app.cell
def _(fatiguelog, mo):
    df_shifts = mo.sql(
        f"""
        SELECT DISTINCT
            ShiftStartTimestampUtc,
            ShiftEndTimestampUtc
        FROM fatiguelog.fatiguelog
        ORDER BY ShiftStartTimestampUtc;
        """,
        output=False,
        engine=fatiguelog
    )
    return (df_shifts,)


@app.cell
def _(df_shifts, pd):
    df_shifts['ShiftStartTimestampUtc'] = (
        pd.to_datetime(df_shifts['ShiftStartTimestampUtc'], utc=True)
        #.dt.tz_convert('America/Santiago')
        .dt.strftime('%Y-%m-%d %H:%M:%S')
    )
    df_shifts['ShiftEndTimestampUtc'] = (
        pd.to_datetime(df_shifts['ShiftEndTimestampUtc'], utc=True)
        #.dt.tz_convert('America/Santiago')
        .dt.strftime('%Y-%m-%d %H:%M:%S')
    )
    df_shifts.sort_values(by='ShiftStartTimestampUtc', inplace=True, ascending=False)
    return


@app.cell
def _(df_shifts, mo):
    turno = mo.ui.dropdown(
        options=dict(
            zip(df_shifts['ShiftStartTimestampUtc'], df_shifts['ShiftEndTimestampUtc'])),
            label="Turno: ",
            value=df_shifts['ShiftStartTimestampUtc'].iloc[0]
    )

    return (turno,)


@app.cell
def _(mo):
    utilization = mo.ui.slider(start=1, stop=11, label="", value=5)
    return (utilization,)


@app.cell
def _(mo):
    fatigues_types = [
        "No Level",
        "Fatigue Level 2",
        "Fatigue Level 3",
        "Fatigue Level 3+",
        "Fatigue Level 4",
        "LifeBand Removed",
        "Poorly fitted cap",
        "LifeBand Charging",
        "LifeBand Charger Detached",
        "No LifeBand Connected",
        "Equipment Off"
    ]
    fatigues_selected = mo.ui.multiselect(options=fatigues_types, value=fatigues_types)

    return (fatigues_selected,)


@app.cell
def _(fatiguelog, mo, turno):
    fatiguelog_name = mo.sql(
        f"""
        SELECT
            Site,
            LifebandId,
            OperatorEid,
            OperatorName,
            ShiftStartTimestampUtc::TIMESTAMP AS ShiftStartTimestampUtc,
            ShiftEndTimestampUtc::TIMESTAMP AS ShiftEndTimestampUtc,
            FatigueStatusName,
            MAX(FatigueLogStatusTimestampUtc::TIMESTAMP) - MIN(FatigueLogStatusTimestampUtc::TIMESTAMP) AS utilization,
            COUNT(*) AS FatigueStatusCount
        FROM
            fatiguelog.fatiguelog
        WHERE
            ShiftStartTimestampUtc::TIMESTAMP >= '{turno.selected_key}'::TIMESTAMP AND
            ShiftEndTimestampUtc::TIMESTAMP <= '{turno.value}'::TIMESTAMP
        GROUP BY
            Site,
            LifebandId,
            OperatorEid,
            OperatorName,
            ShiftStartTimestampUtc,
            ShiftEndTimestampUtc,
            FatigueStatusName
        ORDER BY
            Site,
            OperatorEid,
            ShiftStartTimestampUtc;
        """,
        output=False,
        engine=fatiguelog
    )
    return (fatiguelog_name,)


@app.cell
def _(fatiguelog_name, fatigues_selected, sns, timedelta, utilization):
    _df = fatiguelog_name[fatiguelog_name["utilization"] > timedelta(hours=utilization.value)]
    _df = _df[_df["FatigueStatusName"].isin(fatigues_selected.value)]
    if _df.shape[0] > 0:
        g_pturns = sns.catplot(
            _df,
            y="FatigueStatusName",
            x="FatigueStatusCount",
            hue="FatigueStatusName",
            kind="boxen",
            height=5,
            aspect=3,
            order=fatigues_selected.value,
        )

        # g.fig.suptitle("Distribución de mediciones por nivel por turno", fontsize=20);
        g_pturns.fig.subplots_adjust(top=0.9)
    else:
        g_pturns = None
    return (g_pturns,)


@app.cell
def explicaxion_box_plot(fatigues_selected, mo):
    mo.md(
        f"""
    ## Gráfico *Box plot* de medición por tipo nivel turno

    Se presenta el *Box Plot* de los niveles medididos de cada turno:

    {"".join([f"* {i}\n" for i in fatigues_selected.value])}
    """
    )
    return


@app.cell
def _(fatigues_selected, g_pturns, mo, turno, utilization):
    mo.md(
        f"""
    ***

    ### Distribucion de lecturas por turno

    En esta figura podemos apreciar como se distribuyen las lecturas de la banda
    por turno (podemos establecer el parametro de utilización)

    - {turno}  ➡️  {turno.value}
    - lecturas: {fatigues_selected}
    - Utilización (>={utilization.value}) {utilization}

    {mo.as_html(g_pturns) if g_pturns is not None else ""}
    """
    )
    return


@app.cell
def _(fatiguelog, fatigues_selected, mo, sns, timedelta, utilization):
    _df = mo.sql("""
    SELECT
        Site,
        LifebandId,
        OperatorEid,
        OperatorName,
        ShiftStartTimestampUtc::TIMESTAMP AS ShiftStartTimestampUtc,
        ShiftEndTimestampUtc::TIMESTAMP AS ShiftEndTimestampUtc,
        FatigueStatusName,
        COUNT(*) AS FatigueStatusCount,
        MAX(FatigueLogStatusTimestampUtc::TIMESTAMP) - MIN(FatigueLogStatusTimestampUtc::TIMESTAMP) AS utilization
    FROM
        fatiguelog.fatiguelog
    GROUP BY
        Site,
        LifebandId,
        OperatorEid,
        OperatorName,
        ShiftStartTimestampUtc,
        ShiftEndTimestampUtc,
        FatigueStatusName
    ORDER BY
        Site,
        OperatorEid,
        ShiftStartTimestampUtc;
    """, engine=fatiguelog)

    _df = _df[_df["utilization"] > timedelta(hours=utilization.value)]
    _df = _df[_df["FatigueStatusName"].isin(fatigues_selected.value)]
    fatigues_selected.value
    if _df.shape[0] > 0:
        _g = sns.catplot(
            _df[_df["utilization"] > timedelta(hours=utilization.value)],
            y="FatigueStatusName",
            x="FatigueStatusCount",
            hue="FatigueStatusName",
            kind="boxen",
            height=5,
            aspect=3,
            order=fatigues_selected.value,
        )

        # g.fig.suptitle("Distribución de mediciones por nivel por turno", fontsize=20);
        _g.fig.subplots_adjust(top=0.9)
    else:
        _g = None

    mo.md(
    f"""
    ***

    ### ¿Qué ocurre cuando comparamos todos los los turnos?

    En esta figura podemos apreciar como se distribuyen las lecturas de la banda
    por turno (podemos establecer el parametro de utilización)

    - lecturas: {fatigues_selected}
    - Utilización (>={utilization.value}) {utilization}

    {mo.as_html(_g) if _g is not None else ""}

    {
    mo.callout("Se aprecia una fuerte presencia de mediciones *No Life Band Connected*.", kind='info')
    }

    """
    )
    return


@app.cell
def _(mo):
    mo.md(
        r"""
    # Estudio *frecuencia* de lecturas por banda 

    En este apartado se pretende estudiar el comportamiento de las lecturas para cada banda
    """
    )
    return


@app.cell
def _(fatiguelog, mo, plt, sns):
    lecturas_bandas = mo.sql("""
        SELECT
            LifebandId,
            time_bucket(
                INTERVAL 60 MINUTE, -- bucket width
                FatigueLogStatusTimestampUtc::TIMESTAMP,
                INTERVAL 0 MINUTE -- offset
            ) AS datetime,
            count(*) AS Lecturas
        FROM
            fatiguelog.fatiguelog
        GROUP BY ALL
        ORDER BY 3
        """,
        engine=fatiguelog
    )
    # lecturas_bandas.dropna(subset=["LifebandId"], inplace=True)
    _f = plt.figure(figsize=(10, 5))
    _gs = _f.add_gridspec(1, 2)
    _ax = _f.add_subplot(_gs[0, 0])
    _g = sns.scatterplot(data=lecturas_bandas, x="datetime", y="Lecturas", hue="Lecturas", legend=False, size=5)
    plt.xticks(rotation=30)

    _ax = _f.add_subplot(_gs[0, 1])
    _rigth = sns.boxplot(data=lecturas_bandas, y="Lecturas", legend=False)
    _rigth.set(ylim=(0, 250))


    mo.md(f"""

    ## Lecturas de banda por hora

    {mo.callout("""
    Aquí presentamos como las bandas leen por hora, la idea es encontrar bandas específicas que tengan algun comportamiento muy por fuera de lo normal.

    """)}
    {mo.as_html(_f)}
    """)


    return (lecturas_bandas,)


@app.cell
def _(lecturas_bandas, mo):
    mo.md(
        f"""
    ### Tabla con las lecturas por horas de cada banda
    {mo.ui.dataframe(lecturas_bandas)}
    """
    )
    return


@app.cell(hide_code=True)
def _(fatiguelog, mo):
    top_k_bandas = mo.sql(
        f"""
        WITH hourly_readings AS (
            SELECT
                LifebandId,
                time_bucket(INTERVAL '60 MINUTE', FatigueLogStatusTimestampUtc::TIMESTAMP) AS datetime,
                COUNT(*) AS Lecturas
            FROM
                fatiguelog.fatiguelog
            GROUP BY
                LifebandId,
                time_bucket(INTERVAL '60 MINUTE', FatigueLogStatusTimestampUtc::TIMESTAMP)
        ),
        ranked_bands AS (
            SELECT
                LifebandId,
                datetime,
                Lecturas,
                -- Asigna un ranking por hora (mayor lecturas = mejor posición)
                DENSE_RANK() OVER (
                    PARTITION BY datetime 
                    ORDER BY Lecturas DESC
                ) AS hourly_rank
            FROM hourly_readings
        )
        SELECT
            LifebandId,
            COUNT(*) AS TotalHorasTop
        FROM ranked_bands
        WHERE 
            hourly_rank <= 10
        GROUP BY LifebandId
        ORDER BY TotalHorasTop DESC
        LIMIT 10;
        """,
        engine=fatiguelog
    )
    return (top_k_bandas,)


@app.cell
def _(mo, top_k_bandas):
    table_top_k = mo.ui.table(top_k_bandas, initial_selection=list(range(top_k_bandas.shape[0])))

    return (table_top_k,)


@app.cell
def _(lecturas_bandas, mo, plt, sns, table_top_k, top_k_bandas):
    _f = plt.figure(figsize=(10, 5))
    _gs = _f.add_gridspec(1, 2)
    _ax = _f.add_subplot(_gs[0, 0])
    sns.lineplot(
        lecturas_bandas[lecturas_bandas["LifebandId"].isin(table_top_k.value["LifebandId"])],
        x="datetime", y="Lecturas"
    )
    plt.xticks(rotation=30)
    plt.title("Top k bandas que mas leen")

    _ax = _f.add_subplot(_gs[0, 1])
    sns.lineplot(
        lecturas_bandas[~ lecturas_bandas["LifebandId"].isin(top_k_bandas["LifebandId"])],
        x="datetime", y="Lecturas"
    )
    plt.xticks(rotation=30)
    plt.title("Lecturas sin bandas top 10")


    mo.md(f"""
    ## Top bandas con reiterado top de lecturas

    Ahora mostramos el top 10 de bandas con mayor tasa de lecturas en el tiempo

    {table_top_k}

    Podemos ver la comparacion directa en la siguiente figura:

    {mo.as_html(_f)}
    """)
    return


if __name__ == "__main__":
    app.run()

exporto to a qmd (quarto md)

marimo export md FRD-estudio-nuevo.py -o index.qmd

respond all "yes" for every prompt question.

quarto project

then create quarto project

quarto create project 

chose default named as test then cd in folder test

now, install the marimo-quarto extension:

quarto add marimo-team/quarto-marimo

issue part

Now with all prepared

uvx --with marimo --from quarto-cli quarto preview

gave this

show log

Watching files for changes
Browse at http://localhost:6733/
Installed 23 packages in 20ms
Bytecode compiled 1589 files in 466ms
MarimoExceptionRaisedError: No module named 'duckdb'
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError): 
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError): 
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError): 
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError): 
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError): 
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError): 
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError): 
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError): 
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError): 
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError): 
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError): 
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError): 
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError): 
Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.GET: /

on the qmd file i removed the # converting this

# /// script
# requires-python = ">=3.12"
# dependencies = [
#     "duckdb==1.3.0",
#     "marimo",
#     "matplotlib==3.10.3",
#     "numpy",
#     "pandas==2.2.3",
#     "plotly==6.1.2",
#     "polars",
#     "pyarrow",
#     "seaborn==0.13.2",
#     "sqlglot",
# ]
# ///

into

# /// script
requires-python = ">=3.12"
 dependencies = [
     "duckdb==1.3.0",
     "marimo",
     "matplotlib==3.10.3",
     "numpy",
     "pandas==2.2.3",
     "plotly==6.1.2",
     "polars",
     "pyarrow",
     "seaborn==0.13.2",
     "sqlglot",
 ]
# ///

note that the error are MarimoExceptionRaisedError: No module named 'duckdb', so i do not know why.

Hope the information are usefull for you.

Thanks!


Actualization

Solve the problem MANUALLY move de import code to top of qmd file.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions