import psycopg2 from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.chrome.options import Options from selenium.common.exceptions import NoSuchElementException from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import os from flask import Flask, request, jsonify from waitress import serve import pandas as pd from io import StringIO # Set options for the Chromium browser chrome_options = Options() chrome_options.add_argument("--headless") # Optional: Run Chromium in headless mode chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage") # Specify the path to the Chromium driver service = Service('/usr/bin/chromedriver') driver = None def extract(username: str, password: str): url_credentials = f'https://{username}:{password}@sgu.ulsa.edu.mx/psulsa/alumnos/consultainformacionalumnos/consultainformacion.aspx' url = 'https://sgu.ulsa.edu.mx/psulsa/alumnos/consultainformacionalumnos/consultainformacion.aspx' username_integer = int(username[2:]) def insert_alumno_extraccion(datos_html: str, materias_html: str, historial_html: str = 'error', materias_actuales_html: str = 'error'): try: conn = psycopg2.connect( dbname=os.getenv("DBNAME"), user=os.getenv("DBUSER"), password=os.getenv("DBPASSWORD"), host=os.getenv("DBHOST"), port=os.getenv("DBPORT") ) cur = conn.cursor() insert_query = """ INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", datos_html, materias_html, historial_html, materias_actuales_html) VALUES (%s, TRIM(%s), TRIM(%s), TRIM(%s)::JSONB, TRIM(%s)) ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET datos_html = EXCLUDED.datos_html, materias_html = EXCLUDED.materias_html, error_message = NULL, registrado = DEFAULT; """ cur.execute(insert_query, (username_integer, datos_html, materias_html, historial_html, materias_actuales_html)) conn.commit() return cur.query.decode('utf-8') except psycopg2.ProgrammingError as e: print(f"Error de sintaxis: {e}") except psycopg2.IntegrityError as e: print(f"Error de integridad: {e}") except Exception as e: print(f"Error: {e}") finally: cur.close() conn.close() def update_alumno_extraccion_error(error: str): try: conn = psycopg2.connect( dbname=os.getenv("DBNAME"), user=os.getenv("DBUSER"), password=os.getenv("DBPASSWORD"), host=os.getenv("DBHOST"), port=os.getenv("DBPORT") ) cur = conn.cursor() update_query = """ INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", error_message) VALUES (%s, %s) ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET error_message = EXCLUDED.error_message, materias_html = DEFAULT, registrado = DEFAULT; """ cur.execute(update_query, (username_integer, error[:255])) conn.commit() print("Data updated successfully") except psycopg2.ProgrammingError as e: print(f"Error de sintaxis: {e}") finally: cur.close() conn.close() try: driver.get(url_credentials) driver.get(url) # si no existe el elemento, ctl00_contenedor_control datos_html = driver.find_element(By.ID, 'ctl00_contenedor_control').get_attribute('innerHTML') elemento = WebDriverWait(driver, 3.5).until( EC.presence_of_element_located((By.ID, 'ctl00_contenedor_HistorialAlumno1_lblBtnSeccionHAcademico')) ) elemento.click() # Get the HTML content of the materias element materias_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_divHAcademico').get_attribute('innerHTML') historial_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_gvMaterias').get_attribute('innerHTML') # materias_actuales_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_div13').get_attribute('innerHTML') historial_html_io = StringIO(f"{historial_html}
") # Read the HTML table into a DataFrame df = pd.read_html(historial_html_io)[0] if 'GRUPO' not in df.columns: raise KeyError("Column 'GRUPO' not found in the DataFrame") # Convert the DataFrame to JSON json_result = df[df['GRUPO'] != 'Promedio:'].to_json(orient='records') # Connect to PostgreSQL database query = insert_alumno_extraccion(datos_html, materias_html, json_result) print("Data extracted successfully") return json_result except NoSuchElementException as e: update_alumno_extraccion_error(str(e)) def se_puede_extraer(): try: conn = conn = psycopg2.connect( dbname=os.getenv("DBNAME"), user=os.getenv("DBUSER"), password=os.getenv("DBPASSWORD"), host=os.getenv("DBHOST"), port=os.getenv("DBPORT") ) cursor = conn.cursor() # SELECCIONAR ULTIMA planeacion query = """ SELECT 1 FROM alumno_extraccion_fecha WHERE CURRENT_DATE BETWEEN fecha_inicio AND fecha_fin ORDER BY CREATED_AT DESC LIMIT 1; """ # Ejecuta la consulta cursor.execute(query) result = cursor.fetchone() # Verifica si se obtuvo algún resultado exists = result is not None # Cierra el cursor y la conexión cursor.close() conn.close() return exists except Exception as e: print(f"Error: {e}") return False app = Flask(__name__) @app.route('/calificaciones', methods=['POST']) def main(): global driver # Initialize the WebDriver driver = webdriver.Chrome(service=service, options=chrome_options) username = request.form.get('clave') password = request.form.get('password') se_puede = se_puede_extraer() if se_puede: query = extract(username, password) # Close the session driver.quit() return jsonify({"message": "Data extracted successfully", "en-fecha": se_puede}) if __name__ == '__main__': serve(app, host='0.0.0.0', port=5000)