-
Notifications
You must be signed in to change notification settings - Fork 8
Description
Hi! Hope you may help.
I am a user of Quarto to publish on confluence, that means quarto is indispensable for me.
That marimo is so awesome, want to integrate it to my workflow to publish my research and share with my work team.
ill show my source code then ill show the error. First let explain the problem and what thing i do to get the error.
Opening marimo with sandbox for manage the dependencies (using uv as backend)
marimo edit --sandbox FRD-estudio-nuevo.pycode
show code
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "duckdb==1.3.0",
# "marimo",
# "matplotlib==3.10.3",
# "numpy",
# "pandas==2.2.3",
# "plotly==6.1.2",
# "polars",
# "pyarrow",
# "seaborn==0.13.2",
# "sqlglot",
# ]
# ///
import marimo
__generated_with = "0.13.15"
app = marimo.App(width="medium", sql_output="pandas")
@app.cell(hide_code=True)
def _(mo):
mo.md(
f"""
# Nuevos lineamientos para *FRD*
## Introducción
Para este proyecto, es necesario construir una nueva base sólida en la que basar análisis exploratorio; Algunas de las definiciones deben ser creadas basandose en ciertas preguntas lógicas:
- ¿Que es un mal uso de la Banda?
- ¿Que es riegos?
"""
)
return
@app.cell(hide_code=True)
def _(mo):
mo.md(
f"""
## Mal uso de la Banda
*def*: Se define "*mal uso de la banda*" aquellas interacciones con la banda las cuales tengan por finalidad:
* Engañar al sistema forzandolo a medir estados alterados
* Remover el sistema sin motivo de fuerza aparente entorpeciendo las mediciones
* Utilizar el sistema en una proporcion bastante por debajo de la media de utilizacion del resto de *Operadores*
{mo.callout(
mo.md(
'''
En este caso, es importante hablar de **proporción de tiempo** y **no cantidad de tiempo**,
ya que la duración de un turno puede variar de manera bien justificada.
'''
),
kind="info"
)}
"""
)
return
@app.cell(hide_code=True)
def _(mo):
mo.md(
f"""
## Riesgo
Podemos referirnos al riesgo en diferentes maneras, en este apartado nos referimos al riesgo en el contexto donde se asume:
- *Operador* ha demostrado que utiliza adecuadamente el sistema.
- El tiempo de utilizacion es representativo.
- El funcionamiento de la banda está en parametros normales
"""
)
return
@app.cell
def _():
import marimo as mo
import duckdb
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
from datetime import timedelta
sns.set_context("notebook")
fatiguelog = duckdb.connect("~/fatiguelog")
return fatiguelog, mo, pd, plt, sns, timedelta
@app.cell
def _(fatiguelog, mo):
df_shifts = mo.sql(
f"""
SELECT DISTINCT
ShiftStartTimestampUtc,
ShiftEndTimestampUtc
FROM fatiguelog.fatiguelog
ORDER BY ShiftStartTimestampUtc;
""",
output=False,
engine=fatiguelog
)
return (df_shifts,)
@app.cell
def _(df_shifts, pd):
df_shifts['ShiftStartTimestampUtc'] = (
pd.to_datetime(df_shifts['ShiftStartTimestampUtc'], utc=True)
#.dt.tz_convert('America/Santiago')
.dt.strftime('%Y-%m-%d %H:%M:%S')
)
df_shifts['ShiftEndTimestampUtc'] = (
pd.to_datetime(df_shifts['ShiftEndTimestampUtc'], utc=True)
#.dt.tz_convert('America/Santiago')
.dt.strftime('%Y-%m-%d %H:%M:%S')
)
df_shifts.sort_values(by='ShiftStartTimestampUtc', inplace=True, ascending=False)
return
@app.cell
def _(df_shifts, mo):
turno = mo.ui.dropdown(
options=dict(
zip(df_shifts['ShiftStartTimestampUtc'], df_shifts['ShiftEndTimestampUtc'])),
label="Turno: ",
value=df_shifts['ShiftStartTimestampUtc'].iloc[0]
)
return (turno,)
@app.cell
def _(mo):
utilization = mo.ui.slider(start=1, stop=11, label="", value=5)
return (utilization,)
@app.cell
def _(mo):
fatigues_types = [
"No Level",
"Fatigue Level 2",
"Fatigue Level 3",
"Fatigue Level 3+",
"Fatigue Level 4",
"LifeBand Removed",
"Poorly fitted cap",
"LifeBand Charging",
"LifeBand Charger Detached",
"No LifeBand Connected",
"Equipment Off"
]
fatigues_selected = mo.ui.multiselect(options=fatigues_types, value=fatigues_types)
return (fatigues_selected,)
@app.cell
def _(fatiguelog, mo, turno):
fatiguelog_name = mo.sql(
f"""
SELECT
Site,
LifebandId,
OperatorEid,
OperatorName,
ShiftStartTimestampUtc::TIMESTAMP AS ShiftStartTimestampUtc,
ShiftEndTimestampUtc::TIMESTAMP AS ShiftEndTimestampUtc,
FatigueStatusName,
MAX(FatigueLogStatusTimestampUtc::TIMESTAMP) - MIN(FatigueLogStatusTimestampUtc::TIMESTAMP) AS utilization,
COUNT(*) AS FatigueStatusCount
FROM
fatiguelog.fatiguelog
WHERE
ShiftStartTimestampUtc::TIMESTAMP >= '{turno.selected_key}'::TIMESTAMP AND
ShiftEndTimestampUtc::TIMESTAMP <= '{turno.value}'::TIMESTAMP
GROUP BY
Site,
LifebandId,
OperatorEid,
OperatorName,
ShiftStartTimestampUtc,
ShiftEndTimestampUtc,
FatigueStatusName
ORDER BY
Site,
OperatorEid,
ShiftStartTimestampUtc;
""",
output=False,
engine=fatiguelog
)
return (fatiguelog_name,)
@app.cell
def _(fatiguelog_name, fatigues_selected, sns, timedelta, utilization):
_df = fatiguelog_name[fatiguelog_name["utilization"] > timedelta(hours=utilization.value)]
_df = _df[_df["FatigueStatusName"].isin(fatigues_selected.value)]
if _df.shape[0] > 0:
g_pturns = sns.catplot(
_df,
y="FatigueStatusName",
x="FatigueStatusCount",
hue="FatigueStatusName",
kind="boxen",
height=5,
aspect=3,
order=fatigues_selected.value,
)
# g.fig.suptitle("Distribución de mediciones por nivel por turno", fontsize=20);
g_pturns.fig.subplots_adjust(top=0.9)
else:
g_pturns = None
return (g_pturns,)
@app.cell
def explicaxion_box_plot(fatigues_selected, mo):
mo.md(
f"""
## Gráfico *Box plot* de medición por tipo nivel turno
Se presenta el *Box Plot* de los niveles medididos de cada turno:
{"".join([f"* {i}\n" for i in fatigues_selected.value])}
"""
)
return
@app.cell
def _(fatigues_selected, g_pturns, mo, turno, utilization):
mo.md(
f"""
***
### Distribucion de lecturas por turno
En esta figura podemos apreciar como se distribuyen las lecturas de la banda
por turno (podemos establecer el parametro de utilización)
- {turno} ➡️ {turno.value}
- lecturas: {fatigues_selected}
- Utilización (>={utilization.value}) {utilization}
{mo.as_html(g_pturns) if g_pturns is not None else ""}
"""
)
return
@app.cell
def _(fatiguelog, fatigues_selected, mo, sns, timedelta, utilization):
_df = mo.sql("""
SELECT
Site,
LifebandId,
OperatorEid,
OperatorName,
ShiftStartTimestampUtc::TIMESTAMP AS ShiftStartTimestampUtc,
ShiftEndTimestampUtc::TIMESTAMP AS ShiftEndTimestampUtc,
FatigueStatusName,
COUNT(*) AS FatigueStatusCount,
MAX(FatigueLogStatusTimestampUtc::TIMESTAMP) - MIN(FatigueLogStatusTimestampUtc::TIMESTAMP) AS utilization
FROM
fatiguelog.fatiguelog
GROUP BY
Site,
LifebandId,
OperatorEid,
OperatorName,
ShiftStartTimestampUtc,
ShiftEndTimestampUtc,
FatigueStatusName
ORDER BY
Site,
OperatorEid,
ShiftStartTimestampUtc;
""", engine=fatiguelog)
_df = _df[_df["utilization"] > timedelta(hours=utilization.value)]
_df = _df[_df["FatigueStatusName"].isin(fatigues_selected.value)]
fatigues_selected.value
if _df.shape[0] > 0:
_g = sns.catplot(
_df[_df["utilization"] > timedelta(hours=utilization.value)],
y="FatigueStatusName",
x="FatigueStatusCount",
hue="FatigueStatusName",
kind="boxen",
height=5,
aspect=3,
order=fatigues_selected.value,
)
# g.fig.suptitle("Distribución de mediciones por nivel por turno", fontsize=20);
_g.fig.subplots_adjust(top=0.9)
else:
_g = None
mo.md(
f"""
***
### ¿Qué ocurre cuando comparamos todos los los turnos?
En esta figura podemos apreciar como se distribuyen las lecturas de la banda
por turno (podemos establecer el parametro de utilización)
- lecturas: {fatigues_selected}
- Utilización (>={utilization.value}) {utilization}
{mo.as_html(_g) if _g is not None else ""}
{
mo.callout("Se aprecia una fuerte presencia de mediciones *No Life Band Connected*.", kind='info')
}
"""
)
return
@app.cell
def _(mo):
mo.md(
r"""
# Estudio *frecuencia* de lecturas por banda
En este apartado se pretende estudiar el comportamiento de las lecturas para cada banda
"""
)
return
@app.cell
def _(fatiguelog, mo, plt, sns):
lecturas_bandas = mo.sql("""
SELECT
LifebandId,
time_bucket(
INTERVAL 60 MINUTE, -- bucket width
FatigueLogStatusTimestampUtc::TIMESTAMP,
INTERVAL 0 MINUTE -- offset
) AS datetime,
count(*) AS Lecturas
FROM
fatiguelog.fatiguelog
GROUP BY ALL
ORDER BY 3
""",
engine=fatiguelog
)
# lecturas_bandas.dropna(subset=["LifebandId"], inplace=True)
_f = plt.figure(figsize=(10, 5))
_gs = _f.add_gridspec(1, 2)
_ax = _f.add_subplot(_gs[0, 0])
_g = sns.scatterplot(data=lecturas_bandas, x="datetime", y="Lecturas", hue="Lecturas", legend=False, size=5)
plt.xticks(rotation=30)
_ax = _f.add_subplot(_gs[0, 1])
_rigth = sns.boxplot(data=lecturas_bandas, y="Lecturas", legend=False)
_rigth.set(ylim=(0, 250))
mo.md(f"""
## Lecturas de banda por hora
{mo.callout("""
Aquí presentamos como las bandas leen por hora, la idea es encontrar bandas específicas que tengan algun comportamiento muy por fuera de lo normal.
""")}
{mo.as_html(_f)}
""")
return (lecturas_bandas,)
@app.cell
def _(lecturas_bandas, mo):
mo.md(
f"""
### Tabla con las lecturas por horas de cada banda
{mo.ui.dataframe(lecturas_bandas)}
"""
)
return
@app.cell(hide_code=True)
def _(fatiguelog, mo):
top_k_bandas = mo.sql(
f"""
WITH hourly_readings AS (
SELECT
LifebandId,
time_bucket(INTERVAL '60 MINUTE', FatigueLogStatusTimestampUtc::TIMESTAMP) AS datetime,
COUNT(*) AS Lecturas
FROM
fatiguelog.fatiguelog
GROUP BY
LifebandId,
time_bucket(INTERVAL '60 MINUTE', FatigueLogStatusTimestampUtc::TIMESTAMP)
),
ranked_bands AS (
SELECT
LifebandId,
datetime,
Lecturas,
-- Asigna un ranking por hora (mayor lecturas = mejor posición)
DENSE_RANK() OVER (
PARTITION BY datetime
ORDER BY Lecturas DESC
) AS hourly_rank
FROM hourly_readings
)
SELECT
LifebandId,
COUNT(*) AS TotalHorasTop
FROM ranked_bands
WHERE
hourly_rank <= 10
GROUP BY LifebandId
ORDER BY TotalHorasTop DESC
LIMIT 10;
""",
engine=fatiguelog
)
return (top_k_bandas,)
@app.cell
def _(mo, top_k_bandas):
table_top_k = mo.ui.table(top_k_bandas, initial_selection=list(range(top_k_bandas.shape[0])))
return (table_top_k,)
@app.cell
def _(lecturas_bandas, mo, plt, sns, table_top_k, top_k_bandas):
_f = plt.figure(figsize=(10, 5))
_gs = _f.add_gridspec(1, 2)
_ax = _f.add_subplot(_gs[0, 0])
sns.lineplot(
lecturas_bandas[lecturas_bandas["LifebandId"].isin(table_top_k.value["LifebandId"])],
x="datetime", y="Lecturas"
)
plt.xticks(rotation=30)
plt.title("Top k bandas que mas leen")
_ax = _f.add_subplot(_gs[0, 1])
sns.lineplot(
lecturas_bandas[~ lecturas_bandas["LifebandId"].isin(top_k_bandas["LifebandId"])],
x="datetime", y="Lecturas"
)
plt.xticks(rotation=30)
plt.title("Lecturas sin bandas top 10")
mo.md(f"""
## Top bandas con reiterado top de lecturas
Ahora mostramos el top 10 de bandas con mayor tasa de lecturas en el tiempo
{table_top_k}
Podemos ver la comparacion directa en la siguiente figura:
{mo.as_html(_f)}
""")
return
if __name__ == "__main__":
app.run()exporto to a qmd (quarto md)
marimo export md FRD-estudio-nuevo.py -o index.qmdrespond all "yes" for every prompt question.
quarto project
then create quarto project
quarto create project chose default named as test then cd in folder test
now, install the marimo-quarto extension:
quarto add marimo-team/quarto-marimoissue part
Now with all prepared
uvx --with marimo --from quarto-cli quarto previewgave this
show log
Watching files for changes
Browse at http://localhost:6733/
Installed 23 packages in 20ms
Bytecode compiled 1589 files in 466ms
MarimoExceptionRaisedError: No module named 'duckdb'
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError):
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError):
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError):
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError):
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError):
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError):
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError):
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError):
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError):
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError):
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError):
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError):
MarimoExceptionRaisedError: An ancestor raised an exception (ModuleNotFoundError):
Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.Warning: Only the `disabled` codeblock attribute is utilized for pandoc export. Be sure to set desired code attributes in quarto form.GET: /
on the qmd file i removed the # converting this
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "duckdb==1.3.0",
# "marimo",
# "matplotlib==3.10.3",
# "numpy",
# "pandas==2.2.3",
# "plotly==6.1.2",
# "polars",
# "pyarrow",
# "seaborn==0.13.2",
# "sqlglot",
# ]
# ///into
# /// script
requires-python = ">=3.12"
dependencies = [
"duckdb==1.3.0",
"marimo",
"matplotlib==3.10.3",
"numpy",
"pandas==2.2.3",
"plotly==6.1.2",
"polars",
"pyarrow",
"seaborn==0.13.2",
"sqlglot",
]
# ///note that the error are MarimoExceptionRaisedError: No module named 'duckdb', so i do not know why.
Hope the information are usefull for you.
Thanks!
Actualization
Solve the problem MANUALLY move de import code to top of qmd file.