123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186 |
- import psycopg2
- from selenium import webdriver
- from selenium.webdriver.chrome.service import Service
- from selenium.webdriver.common.by import By
- from selenium.webdriver.common.keys import Keys
- from selenium.webdriver.chrome.options import Options
- from selenium.common.exceptions import NoSuchElementException
- from selenium.webdriver.support.ui import WebDriverWait
- from selenium.webdriver.support import expected_conditions as EC
- import os
- from flask import Flask, request, jsonify
- from waitress import serve
- import pandas as pd
- from io import StringIO
- # Set options for the Chromium browser
- chrome_options = Options()
- chrome_options.add_argument("--headless") # Optional: Run Chromium in headless mode
- chrome_options.add_argument("--no-sandbox")
- chrome_options.add_argument("--disable-dev-shm-usage")
- # Specify the path to the Chromium driver
- service = Service('/usr/bin/chromedriver')
- driver = None
- def extract(username: str, password: str):
- url_credentials = f'https://{username}:{password}@sgu.ulsa.edu.mx/psulsa/alumnos/consultainformacionalumnos/consultainformacion.aspx'
- url = 'https://sgu.ulsa.edu.mx/psulsa/alumnos/consultainformacionalumnos/consultainformacion.aspx'
- username_integer = int(username[2:])
-
- def insert_alumno_extraccion(datos_html: str, materias_html: str, historial_html: str = 'error', materias_actuales_html: str = 'error'):
- try:
- conn = psycopg2.connect(
- dbname=os.getenv("DBNAME"),
- user=os.getenv("DBUSER"),
- password=os.getenv("DBPASSWORD"),
- host=os.getenv("DBHOST"),
- port=os.getenv("DBPORT")
- )
- cur = conn.cursor()
- insert_query = """
- INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", datos_html, materias_html, historial_html, materias_actuales_html) VALUES (%s, TRIM(%s), TRIM(%s), TRIM(%s)::JSONB, TRIM(%s))
- ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET datos_html = EXCLUDED.datos_html, materias_html = EXCLUDED.materias_html, error_message = NULL, registrado = DEFAULT;
- """
- cur.execute(insert_query, (username_integer, datos_html, materias_html, historial_html, materias_actuales_html))
- conn.commit()
- return cur.query.decode('utf-8')
- except psycopg2.ProgrammingError as e:
- print(f"Error de sintaxis: {e}")
- except psycopg2.IntegrityError as e:
- print(f"Error de integridad: {e}")
- except Exception as e:
- print(f"Error: {e}")
- finally:
- cur.close()
- conn.close()
-
- def update_alumno_extraccion_error(error: str):
- try:
-
- conn = psycopg2.connect(
- dbname=os.getenv("DBNAME"),
- user=os.getenv("DBUSER"),
- password=os.getenv("DBPASSWORD"),
- host=os.getenv("DBHOST"),
- port=os.getenv("DBPORT")
- )
- cur = conn.cursor()
- update_query = """
- INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", error_message) VALUES (%s, %s)
- ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET error_message = EXCLUDED.error_message,
- materias_html = DEFAULT, registrado = DEFAULT;
- """
- cur.execute(update_query, (username_integer, error[:255]))
- conn.commit()
- print("Data updated successfully")
- except psycopg2.ProgrammingError as e:
- print(f"Error de sintaxis: {e}")
-
- finally:
- cur.close()
- conn.close()
- try:
- driver.get(url_credentials)
- driver.get(url)
- # si no existe el elemento, ctl00_contenedor_control
- datos_html = driver.find_element(By.ID, 'ctl00_contenedor_control').get_attribute('innerHTML')
-
- elemento = WebDriverWait(driver, 3.5).until(
- EC.presence_of_element_located((By.ID, 'ctl00_contenedor_HistorialAlumno1_lblBtnSeccionHAcademico'))
- )
- elemento.click()
-
- # Get the HTML content of the materias element
- materias_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_divHAcademico').get_attribute('innerHTML')
- historial_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_gvMaterias').get_attribute('innerHTML')
- # materias_actuales_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_div13').get_attribute('innerHTML')
-
- historial_html_io = StringIO(f"<table>{historial_html}</table>")
- # Read the HTML table into a DataFrame
- df = pd.read_html(historial_html_io)[0]
-
-
- if 'GRUPO' not in df.columns:
- raise KeyError("Column 'GRUPO' not found in the DataFrame")
- # Convert the DataFrame to JSON
- json_result = df[df['GRUPO'] != 'Promedio:'].to_json(orient='records')
-
- # Connect to PostgreSQL database
- query = insert_alumno_extraccion(datos_html, materias_html, json_result)
-
- print("Data extracted successfully")
- return json_result
- except NoSuchElementException as e:
- update_alumno_extraccion_error(str(e))
- def se_puede_extraer():
- try:
- conn = conn = psycopg2.connect(
- dbname=os.getenv("DBNAME"),
- user=os.getenv("DBUSER"),
- password=os.getenv("DBPASSWORD"),
- host=os.getenv("DBHOST"),
- port=os.getenv("DBPORT")
- )
- cursor = conn.cursor()
-
- # SELECCIONAR ULTIMA planeacion
-
-
- query = """
- SELECT 1
- FROM alumno_extraccion_fecha
- WHERE CURRENT_DATE BETWEEN fecha_inicio AND fecha_fin
- ORDER BY CREATED_AT DESC
- LIMIT 1;
- """
- # Ejecuta la consulta
- cursor.execute(query)
- result = cursor.fetchone()
-
- # Verifica si se obtuvo algún resultado
- exists = result is not None
-
- # Cierra el cursor y la conexión
- cursor.close()
- conn.close()
-
- return exists
- except Exception as e:
- print(f"Error: {e}")
- return False
- app = Flask(__name__)
- @app.route('/calificaciones', methods=['POST'])
- def main():
- global driver
- # Initialize the WebDriver
- driver = webdriver.Chrome(service=service, options=chrome_options)
-
- username = request.form.get('clave')
- password = request.form.get('password')
- se_puede = se_puede_extraer()
- if se_puede:
- query = extract(username, password)
-
- # Close the session
- driver.quit()
-
- return jsonify({"message": "Data extracted successfully", "en-fecha": se_puede})
- if __name__ == '__main__':
- serve(app, host='0.0.0.0', port=5000)
-
|